From 4346836d8614b05a131d83a60f4f2e7464ed4505 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Mon, 7 Aug 2023 01:14:56 -0400 Subject: [PATCH 01/30] add south building 128 to ci (#691) Co-authored-by: senselessdev1 --- .github/scripts/download_single_benchmark.sh | 9 ++++++++- .github/scripts/execute_single_benchmark.sh | 4 ++++ .github/workflows/benchmark.yml | 3 ++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/scripts/download_single_benchmark.sh b/.github/scripts/download_single_benchmark.sh index 65539c7f0..53a26c2b5 100755 --- a/.github/scripts/download_single_benchmark.sh +++ b/.github/scripts/download_single_benchmark.sh @@ -66,6 +66,10 @@ function download_and_unzip_dataset_files { elif [ "$DATASET_NAME" == "gerrard-hall-100" ]; then WGET_URL1=https://github.com/johnwlambert/gtsfm-datasets-mirror/releases/download/gerrard-hall-100/gerrard-hall-100.zip ZIP_FNAME=gerrard-hall-100.zip + + elif [ "$DATASET_NAME" == "south-building-128" ]; then + WGET_URL1=https://github.com/johnwlambert/gtsfm-datasets-mirror/releases/download/south-building-128/south-building-128.zip + ZIP_FNAME=south-building-128.zip fi # Download the data. @@ -132,7 +136,10 @@ function download_and_unzip_dataset_files { elif [ "$DATASET_NAME" == "gerrard-hall-100" ]; then unzip gerrard-hall-100.zip - ls -ltrh gerrard-hall-100 + + elif [ "$DATASET_NAME" == "south-building-128" ]; then + unzip south-building-128.zip + fi } diff --git a/.github/scripts/execute_single_benchmark.sh b/.github/scripts/execute_single_benchmark.sh index 8e2d7567b..bf0c27c02 100755 --- a/.github/scripts/execute_single_benchmark.sh +++ b/.github/scripts/execute_single_benchmark.sh @@ -30,6 +30,10 @@ elif [ "$DATASET_NAME" == "notre-dame-20" ]; then elif [ "$DATASET_NAME" == "gerrard-hall-100" ]; then IMAGES_DIR=gerrard-hall-100/images COLMAP_FILES_DIRPATH=gerrard-hall-100/colmap-3.7-sparse-txt-2023-07-27 +elif [ "$DATASET_NAME" == "south-building-128" ]; then + IMAGES_DIR=south-building-128/images + #COLMAP_FILES_DIRPATH=south-building-128/colmap-official-2016-10-05 + COLMAP_FILES_DIRPATH=south-building-128/colmap-2023-07-28-txt fi echo "Config: ${CONFIG_NAME}, Loader: ${LOADER_NAME}" diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index d4c4ad940..80142cf0b 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -25,7 +25,8 @@ jobs: [deep_front_end_astrovision, 2011205_rc3, 65, png, wget, astrovision, 1024, true], [sift_front_end, gerrard-hall-100, 0, jpg, wget, colmap-loader, 760, true], [deep_front_end, gerrard-hall-100, 0, jpg, wget, colmap-loader, 760, true], - [synthetic_front_end, tanks-and-temples-barn-410, 4, jpg, wget, tanks-and-temples, 1080, true], + [sift_front_end, south-building-128, 0, jpg, wget, colmap-loader, 760, true], + [deep_front_end, south-building-128, 0, jpg, wget, colmap-loader, 760, true], ] defaults: run: From 5351d659378772007a0ee54ae26d51ab6be99516 Mon Sep 17 00:00:00 2001 From: Akshay Krishnan Date: Sat, 12 Aug 2023 06:16:56 +0000 Subject: [PATCH 02/30] add timing metrics --- .../rotation/rotation_averaging_base.py | 70 +++- gtsfm/averaging/rotation/shonan.py | 1 + .../averaging/translation/averaging_1dsfm.py | 297 ++++++++++---- gtsfm/bundle/bundle_adjustment.py | 367 ++++++++++++++---- 4 files changed, 574 insertions(+), 161 deletions(-) diff --git a/gtsfm/averaging/rotation/rotation_averaging_base.py b/gtsfm/averaging/rotation/rotation_averaging_base.py index 5a04968d9..a933a7c46 100644 --- a/gtsfm/averaging/rotation/rotation_averaging_base.py +++ b/gtsfm/averaging/rotation/rotation_averaging_base.py @@ -3,6 +3,7 @@ Authors: Jing Wu, Ayush Baid """ import abc +import time from typing import Dict, List, Optional, Tuple import dask @@ -28,7 +29,10 @@ def get_ui_metadata() -> UiMetadata: return UiMetadata( display_name="Rotation Averaging", - input_products=("View-Graph Relative Rotations", "Relative Pose Priors"), + input_products=( + "View-Graph Relative Rotations", + "Relative Pose Priors", + ), output_products="Global Rotations", parent_plate="Sparse Reconstruction", ) @@ -54,7 +58,41 @@ def run_rotation_averaging( underconstrained system or ill-constrained system). """ - def evaluate(self, wRi_computed: List[Optional[Rot3]], wTi_gt: List[Optional[Pose3]]) -> GtsfmMetricsGroup: + def _run_rotation_averaging_base( + self, + num_images: int, + i2Ri1_dict: Dict[Tuple[int, int], Optional[Rot3]], + i1Ti2_priors: Dict[Tuple[int, int], PosePrior], + wTi_gt: List[Optional[Pose3]], + ) -> Tuple[List[Optional[Rot3]], GtsfmMetricsGroup]: + """Run rotation averaging and computes metrics. + + Args: + num_images: number of poses. + i2Ri1_dict: relative rotations as dictionary (i1, i2): i2Ri1. + i1Ti2_priors: priors on relative poses as dictionary(i1, i2): PosePrior on i1Ti2. + wTi_gt: ground truth global rotations to compare against. + + Returns: + Global rotations for each camera pose, i.e. wRi, as a list. The number of entries in the list is + `num_images`. The list may contain `None` where the global rotation could not be computed (either + underconstrained system or ill-constrained system). + Metrics on global rotations. + """ + start_time = time.time() + wRis = self.run_rotation_averaging( + num_images, i2Ri1_dict, i1Ti2_priors + ) + run_time = time.time() - start_time + + metrics = self.evaluate(wRis, wTi_gt) + metrics.add_metric(GtsfmMetric("total_duration_sec", run_time)) + + return wRis, metrics + + def evaluate( + self, wRi_computed: List[Optional[Rot3]], wTi_gt: List[Optional[Pose3]] + ) -> GtsfmMetricsGroup: """Evaluate the global rotations computed by the rotation averaging implementation. Args: @@ -66,17 +104,30 @@ def evaluate(self, wRi_computed: List[Optional[Rot3]], wTi_gt: List[Optional[Pos Returns: Metrics on global rotations. """ - wRi_gt = [wTi.rotation() if wTi is not None else None for wTi in wTi_gt] + wRi_gt = [ + wTi.rotation() if wTi is not None else None for wTi in wTi_gt + ] if len(wRi_computed) != len(wRi_gt): - raise ValueError("Lengths of wRi_list and gt_wRi_list should be the same.") + raise ValueError( + "Lengths of wRi_list and gt_wRi_list should be the same." + ) wRi_aligned = comp_utils.align_rotations(wRi_gt, wRi_computed) metrics = [] - metrics.append(GtsfmMetric(name="num_rotations_computed", data=len([x for x in wRi_computed if x is not None]))) - metrics.append(metric_utils.compute_rotation_angle_metric(wRi_aligned, wRi_gt)) - return GtsfmMetricsGroup(name="rotation_averaging_metrics", metrics=metrics) + metrics.append( + GtsfmMetric( + name="num_rotations_computed", + data=len([x for x in wRi_computed if x is not None]), + ) + ) + metrics.append( + metric_utils.compute_rotation_angle_metric(wRi_aligned, wRi_gt) + ) + return GtsfmMetricsGroup( + name="rotation_averaging_metrics", metrics=metrics + ) def create_computation_graph( self, @@ -97,7 +148,8 @@ def create_computation_graph( global rotations wrapped using dask.delayed. """ - wRis = dask.delayed(self.run_rotation_averaging)(num_images, i2Ri1_graph, i1Ti2_priors) - metrics = dask.delayed(self.evaluate)(wRis, gt_wTi_list) + wRis, metrics = dask.delayed( + self._run_rotation_averaging_base, nout=2 + )(num_images, i2Ri1_graph, i1Ti2_priors) return wRis, metrics diff --git a/gtsfm/averaging/rotation/shonan.py b/gtsfm/averaging/rotation/shonan.py index a6025b8f2..6e7c7158e 100644 --- a/gtsfm/averaging/rotation/shonan.py +++ b/gtsfm/averaging/rotation/shonan.py @@ -10,6 +10,7 @@ Authors: Jing Wu, Ayush Baid, John Lambert """ +import time from typing import Dict, List, Optional, Set, Tuple import gtsam diff --git a/gtsfm/averaging/translation/averaging_1dsfm.py b/gtsfm/averaging/translation/averaging_1dsfm.py index a168b5e39..8455d2ee0 100644 --- a/gtsfm/averaging/translation/averaging_1dsfm.py +++ b/gtsfm/averaging/translation/averaging_1dsfm.py @@ -13,6 +13,7 @@ from collections import defaultdict from enum import Enum from typing import DefaultDict, Dict, List, Optional, Set, Tuple +import time import gtsam import numpy as np from gtsam import ( @@ -34,7 +35,9 @@ import gtsfm.utils.logger as logger_utils import gtsfm.utils.metrics as metrics_utils import gtsfm.utils.sampling as sampling_utils -from gtsfm.averaging.translation.translation_averaging_base import TranslationAveragingBase +from gtsfm.averaging.translation.translation_averaging_base import ( + TranslationAveragingBase, +) from gtsfm.common.pose_prior import PosePrior from gtsfm.common.sfm_track import SfmTrack2d from gtsfm.evaluation.metrics import GtsfmMetric, GtsfmMetricsGroup @@ -115,16 +118,31 @@ def __sample_projection_directions( """ num_measurements = len(w_i2Ui1_list) - if self._projection_sampling_method == self.ProjectionSamplingMethod.SAMPLE_INPUT_MEASUREMENTS: - num_samples = min(num_measurements, self._max_1dsfm_projection_directions) - sampled_indices = np.random.choice(num_measurements, num_samples, replace=False) + if ( + self._projection_sampling_method + == self.ProjectionSamplingMethod.SAMPLE_INPUT_MEASUREMENTS + ): + num_samples = min( + num_measurements, self._max_1dsfm_projection_directions + ) + sampled_indices = np.random.choice( + num_measurements, num_samples, replace=False + ) projections = [w_i2Ui1_list[idx] for idx in sampled_indices] - elif self._projection_sampling_method == self.ProjectionSamplingMethod.SAMPLE_WITH_INPUT_DENSITY: + elif ( + self._projection_sampling_method + == self.ProjectionSamplingMethod.SAMPLE_WITH_INPUT_DENSITY + ): projections = sampling_utils.sample_kde_directions( w_i2Ui1_list, num_samples=self._max_1dsfm_projection_directions ) - elif self._projection_sampling_method == self.ProjectionSamplingMethod.SAMPLE_WITH_UNIFORM_DENSITY: - projections = sampling_utils.sample_random_directions(num_samples=self._max_1dsfm_projection_directions) + elif ( + self._projection_sampling_method + == self.ProjectionSamplingMethod.SAMPLE_WITH_UNIFORM_DENSITY + ): + projections = sampling_utils.sample_random_directions( + num_samples=self._max_1dsfm_projection_directions + ) else: raise ValueError("Unsupported sampling method!") @@ -148,14 +166,20 @@ def _binary_measurements_from_dict( """ w_i1Ui2_measurements = BinaryMeasurementsUnit3() for (i1, i2), w_i2Ui1 in w_i2Ui1_dict.items(): - w_i1Ui2_measurements.append(BinaryMeasurementUnit3(C(i2), C(i1), w_i2Ui1, noise_model)) + w_i1Ui2_measurements.append( + BinaryMeasurementUnit3(C(i2), C(i1), w_i2Ui1, noise_model) + ) for (j, i), w_iUj in w_iUj_dict_tracks.items(): - w_i1Ui2_measurements.append(BinaryMeasurementUnit3(C(i), L(j), w_iUj, noise_model)) + w_i1Ui2_measurements.append( + BinaryMeasurementUnit3(C(i), L(j), w_iUj, noise_model) + ) return w_i1Ui2_measurements def _binary_measurements_from_priors( - self, i2Ti1_priors: Dict[Tuple[int, int], PosePrior], wRi_list: List[Rot3] + self, + i2Ti1_priors: Dict[Tuple[int, int], PosePrior], + wRi_list: List[Rot3], ) -> BinaryMeasurementsPoint3: """Converts the priors from relative Pose3 priors to relative Point3 measurements in world frame. @@ -208,12 +232,20 @@ def compute_inliers( """ # Sample directions for projection - combined_measurements = list(w_i2Ui1_dict.values()) + list(w_iUj_dict_tracks.values()) - projection_directions = self.__sample_projection_directions(combined_measurements) + combined_measurements = list(w_i2Ui1_dict.values()) + list( + w_iUj_dict_tracks.values() + ) + projection_directions = self.__sample_projection_directions( + combined_measurements + ) # Convert to measurements: map indexes to symbols. - dummy_noise_model = gtsam.noiseModel.Isotropic.Sigma(3, 1e-2) # MFAS does not use this. - w_i1Ui2_measurements = self._binary_measurements_from_dict(w_i2Ui1_dict, w_iUj_dict_tracks, dummy_noise_model) + dummy_noise_model = gtsam.noiseModel.Isotropic.Sigma( + 3, 1e-2 + ) # MFAS does not use this. + w_i1Ui2_measurements = self._binary_measurements_from_dict( + w_i2Ui1_dict, w_iUj_dict_tracks, dummy_noise_model + ) # Compute outlier weights using MFAS. # TODO(ayush): parallelize this step. @@ -224,14 +256,19 @@ def compute_inliers( logger.debug("Computed outlier weights using MFAS.") # Compute average outlier weight. - outlier_weights_sum: DefaultDict[Tuple[int, int], float] = defaultdict(float) + outlier_weights_sum: DefaultDict[Tuple[int, int], float] = defaultdict( + float + ) inliers = set() for outlier_weight_dict in outlier_weights: for w_i1Ui2 in w_i1Ui2_measurements: i1, i2 = w_i1Ui2.key1(), w_i1Ui2.key2() outlier_weights_sum[(i1, i2)] += outlier_weight_dict[(i1, i2)] for (i1, i2), weight_sum in outlier_weights_sum.items(): - if weight_sum / len(projection_directions) < OUTLIER_WEIGHT_THRESHOLD: + if ( + weight_sum / len(projection_directions) + < OUTLIER_WEIGHT_THRESHOLD + ): inliers.add((i1, i2)) # Filter outliers, index back from symbol to int. @@ -239,13 +276,16 @@ def compute_inliers( inlier_w_i2Ui1_dict = {} inlier_w_iUj_dict_tracks = {} inlier_cameras: Set[int] = set() - for (i1, i2) in w_i2Ui1_dict: - if (C(i2), C(i1)) in inliers: # there is a flip in indices from w_i2Ui1_dict to inliers. + for i1, i2 in w_i2Ui1_dict: + if ( + C(i2), + C(i1), + ) in inliers: # there is a flip in indices from w_i2Ui1_dict to inliers. inlier_w_i2Ui1_dict[(i1, i2)] = w_i2Ui1_dict[(i1, i2)] inlier_cameras.add(i1) inlier_cameras.add(i2) - for (j, i) in w_iUj_dict_tracks: + for j, i in w_iUj_dict_tracks: # Same as above, `inliers` contains symbols that are flipped - C(i), L(j). # Only add an inlier camera-track measurements if the camera has other camera-camera inliers. if (C(i), L(j)) in inliers and i in inlier_cameras: @@ -253,7 +293,9 @@ def compute_inliers( return inlier_w_i2Ui1_dict, inlier_w_iUj_dict_tracks, inlier_cameras - def __get_initial_values(self, wTi_initial: List[Optional[PosePrior]]) -> gtsam.Values: + def __get_initial_values( + self, wTi_initial: List[Optional[PosePrior]] + ) -> gtsam.Values: """Converts translations from a list of absolute poses to gtsam.Values for initialization. Args: @@ -294,17 +336,26 @@ def _select_tracks_for_averaging( """ filtered_tracks = [] for track in tracks: - valid_cameras_track = track.select_for_cameras(camera_idxs=valid_cameras) - if valid_cameras_track.number_measurements() < MIN_TRACK_MEASUREMENTS_FOR_AVERAGING: + valid_cameras_track = track.select_for_cameras( + camera_idxs=valid_cameras + ) + if ( + valid_cameras_track.number_measurements() + < MIN_TRACK_MEASUREMENTS_FOR_AVERAGING + ): continue filtered_tracks.append(valid_cameras_track) tracks_subset = [] # Number of measurements per camera that we still need to add. - num_remaining_measurements = {c: measurements_per_camera for c in valid_cameras} + num_remaining_measurements = { + c: measurements_per_camera for c in valid_cameras + } # Number of measurements added by each track. - improvement = [t.number_measurements() for t in filtered_tracks] # how much cover each track would add + improvement = [ + t.number_measurements() for t in filtered_tracks + ] # how much cover each track would add # preparation: make a lookup from camera to tracks in the camera camera_track_lookup = {c: [] for c in valid_cameras} @@ -314,7 +365,9 @@ def _select_tracks_for_averaging( measurement = track.measurement(j) camera_track_lookup[measurement.i].append(track_id) - for count in range(len(filtered_tracks)): # artificial limit to avoid infinite loop + for count in range( + len(filtered_tracks) + ): # artificial limit to avoid infinite loop # choose track that maximizes the heuristic best_track_id = np.argmax(improvement) if improvement[best_track_id] <= 0: @@ -326,13 +379,17 @@ def _select_tracks_for_averaging( for measurement in filtered_tracks[best_track_id].measurements: if num_remaining_measurements[measurement.i] == 0: continue - num_remaining_measurements[measurement.i] = num_remaining_measurements[measurement.i] - 1 + num_remaining_measurements[measurement.i] = ( + num_remaining_measurements[measurement.i] - 1 + ) # If this image just got covered the k'th time, it's done. # Lower the improvement for all tracks that see it. if num_remaining_measurements[measurement.i] == 0: for t in camera_track_lookup[measurement.i]: - improvement[t] = improvement[t] - 1 if improvement[t] > 0 else 0 + improvement[t] = ( + improvement[t] - 1 if improvement[t] > 0 else 0 + ) return tracks_subset @@ -359,11 +416,17 @@ def _get_landmark_directions( cam_idx = measurement.i if intrinsics[cam_idx] is None or wRi_list[cam_idx] is None: - raise ValueError("Camera intrinsics or rotation cannot be None for input track measurements") + raise ValueError( + "Camera intrinsics or rotation cannot be None for input track measurements" + ) measurement_xy = intrinsics[cam_idx].calibrate(measurement.uv) - measurement_homog = Point3(measurement_xy[0], measurement_xy[1], 1.0) - w_cam_U_track = Unit3(wRi_list[cam_idx].rotate(Unit3(measurement_homog).point3())) + measurement_homog = Point3( + measurement_xy[0], measurement_xy[1], 1.0 + ) + w_cam_U_track = Unit3( + wRi_list[cam_idx].rotate(Unit3(measurement_homog).point3()) + ) # Direction starts at camera, but first index is track_id. landmark_directions[(track_id, cam_idx)] = w_cam_U_track @@ -394,24 +457,36 @@ def __run_averaging( List of camera translations in world frame, with as many entries as the number of images. """ logger.info( - "Using %d track measurements and %d camera measurements", len(w_i2Ui1_dict_tracks), len(w_i2Ui1_dict) + "Using %d track measurements and %d camera measurements", + len(w_i2Ui1_dict_tracks), + len(w_i2Ui1_dict), ) - noise_model = gtsam.noiseModel.Isotropic.Sigma(NOISE_MODEL_DIMENSION, NOISE_MODEL_SIGMA) + noise_model = gtsam.noiseModel.Isotropic.Sigma( + NOISE_MODEL_DIMENSION, NOISE_MODEL_SIGMA + ) if self._robust_measurement_noise: huber_loss = gtsam.noiseModel.mEstimator.Huber.Create(HUBER_LOSS_K) - noise_model = gtsam.noiseModel.Robust.Create(huber_loss, noise_model) + noise_model = gtsam.noiseModel.Robust.Create( + huber_loss, noise_model + ) - w_i1Ui2_measurements = self._binary_measurements_from_dict(w_i2Ui1_dict, w_i2Ui1_dict_tracks, noise_model) + w_i1Ui2_measurements = self._binary_measurements_from_dict( + w_i2Ui1_dict, w_i2Ui1_dict_tracks, noise_model + ) # Run the optimizer. try: algorithm = TranslationRecovery() if len(i2Ti1_priors) > 0: # scale is ignored here. - w_i1ti2_priors = self._binary_measurements_from_priors(i2Ti1_priors, wRi_list) + w_i1ti2_priors = self._binary_measurements_from_priors( + i2Ti1_priors, wRi_list + ) wti_initial = self.__get_initial_values(absolute_pose_priors) - wti_values = algorithm.run(w_i1Ui2_measurements, 0.0, w_i1ti2_priors, wti_initial) + wti_values = algorithm.run( + w_i1Ui2_measurements, 0.0, w_i1ti2_priors, wti_initial + ) else: wti_values = algorithm.run(w_i1Ui2_measurements, scale_factor) except TypeError as e: @@ -434,7 +509,9 @@ def run_translation_averaging( i2Ui1_dict: Dict[Tuple[int, int], Optional[Unit3]], wRi_list: List[Optional[Rot3]], tracks_2d: Optional[List[SfmTrack2d]] = None, - intrinsics: Optional[List[Optional[gtsfm_types.CALIBRATION_TYPE]]] = None, + intrinsics: Optional[ + List[Optional[gtsfm_types.CALIBRATION_TYPE]] + ] = None, absolute_pose_priors: List[Optional[PosePrior]] = [], i2Ti1_priors: Dict[Tuple[int, int], PosePrior] = {}, scale_factor: float = 1.0, @@ -457,26 +534,45 @@ def run_translation_averaging( or ill-constrained system). A GtsfmMetricsGroup of 1DSfM metrics. """ - logger.info("Running translation averaging on %d unit translations", len(i2Ui1_dict)) + logger.info( + "Running translation averaging on %d unit translations", + len(i2Ui1_dict), + ) - w_i2Ui1_dict, valid_cameras = get_valid_measurements_in_world_frame(i2Ui1_dict, wRi_list) + w_i2Ui1_dict, valid_cameras = get_valid_measurements_in_world_frame( + i2Ui1_dict, wRi_list + ) + start_time = time.time() if self._use_tracks_for_averaging: if tracks_2d is None: - logger.info("No tracks provided for translation averaging. Falling back to camera unit translations.") + logger.info( + "No tracks provided for translation averaging. Falling back to camera unit translations." + ) w_i2Ui1_dict_tracks = {} elif intrinsics is None or len(intrinsics) != len(wRi_list): - raise ValueError("Number of intrinsics must match number of rotations when tracks are provided.") + raise ValueError( + "Number of intrinsics must match number of rotations when tracks are provided." + ) else: - selected_tracks = self._select_tracks_for_averaging(tracks_2d, valid_cameras, intrinsics) - w_i2Ui1_dict_tracks = self._get_landmark_directions(selected_tracks, intrinsics, wRi_list) + selected_tracks = self._select_tracks_for_averaging( + tracks_2d, valid_cameras, intrinsics + ) + w_i2Ui1_dict_tracks = self._get_landmark_directions( + selected_tracks, intrinsics, wRi_list + ) else: w_i2Ui1_dict_tracks = {} - w_i2Ui1_dict_inliers, w_i2Ui1_dict_tracks_inliers, inlier_cameras = self.compute_inliers( - w_i2Ui1_dict, w_i2Ui1_dict_tracks - ) + inlier_computation_start_time = time.time() + ( + w_i2Ui1_dict_inliers, + w_i2Ui1_dict_tracks_inliers, + inlier_cameras, + ) = self.compute_inliers(w_i2Ui1_dict, w_i2Ui1_dict_tracks) + inlier_computation_time = time.time() - inlier_computation_start_time + averaging_start_time = time.time() wti_list = self.__run_averaging( num_images=num_images, w_i2Ui1_dict=w_i2Ui1_dict_inliers, @@ -486,17 +582,41 @@ def run_translation_averaging( absolute_pose_priors=absolute_pose_priors, scale_factor=scale_factor, ) + averaging_time = time.time() - averaging_start_time # Compute the metrics. - ta_metrics = compute_metrics(set(w_i2Ui1_dict_inliers.keys()), i2Ui1_dict, wRi_list, wti_list, gt_wTi_list) + ta_metrics = compute_metrics( + set(w_i2Ui1_dict_inliers.keys()), + i2Ui1_dict, + wRi_list, + wti_list, + gt_wTi_list, + ) num_translations = sum([1 for wti in wti_list if wti is not None]) - logger.info("Estimated %d translations out of %d images.", num_translations, num_images) + logger.info( + "Estimated %d translations out of %d images.", + num_translations, + num_images, + ) # Combine estimated global rotations and global translations to Pose(3) objects. wTi_list = [ - Pose3(wRi, wti) if wRi is not None and wti is not None else None for wRi, wti in zip(wRi_list, wti_list) + Pose3(wRi, wti) if wRi is not None and wti is not None else None + for wRi, wti in zip(wRi_list, wti_list) ] + total_time = time.time() - start_time + logger.info("Translation averaging took %.4f seconds.", total_time) + ta_metrics.add_metric(GtsfmMetric("total_duration_sec", total_time)) + ta_metrics.add_metric( + GtsfmMetric( + "outier_rejection_duration_sec", inlier_computation_time + ) + ) + ta_metrics.add_metric( + GtsfmMetric("optimization_duration_sec", averaging_time) + ) + return wTi_list, ta_metrics @@ -521,56 +641,96 @@ def compute_metrics( - Distribution of translation direction angle for outlier measurements. """ outlier_i1_i2_pairs = ( - set([pair_idx for pair_idx, val in i2Ui1_dict.items() if val is not None]) - inlier_i1_i2_pairs + set( + [ + pair_idx + for pair_idx, val in i2Ui1_dict.items() + if val is not None + ] + ) + - inlier_i1_i2_pairs ) num_total_measurements = len(inlier_i1_i2_pairs) + len(outlier_i1_i2_pairs) ta_metrics = [ GtsfmMetric("num_total_1dsfm_measurements", num_total_measurements), GtsfmMetric("num_inlier_1dsfm_measurements", len(inlier_i1_i2_pairs)), - GtsfmMetric("num_outlier_1dsfm_measurements", len(outlier_i1_i2_pairs)), - GtsfmMetric("num_translations_estimated", len([wti for wti in wti_list if wti is not None])), + GtsfmMetric( + "num_outlier_1dsfm_measurements", len(outlier_i1_i2_pairs) + ), + GtsfmMetric( + "num_translations_estimated", + len([wti for wti in wti_list if wti is not None]), + ), ] # Remaining metrics require ground truth, so return if GT is not available. - gt_available = np.array([gt_wTi is not None for gt_wTi in gt_wTi_list]).any() + gt_available = np.array( + [gt_wTi is not None for gt_wTi in gt_wTi_list] + ).any() if not gt_available: return GtsfmMetricsGroup("translation_averaging_metrics", ta_metrics) # Get ground truth translation directions for the measurements. - gt_i2Ui1_dict = metrics_utils.get_twoview_translation_directions(gt_wTi_list) + gt_i2Ui1_dict = metrics_utils.get_twoview_translation_directions( + gt_wTi_list + ) # Angle between i2Ui1 measurement and GT i2Ui1 measurement for inliers and outliers. - inlier_angular_errors = metrics_utils.get_measurement_angle_errors(inlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict) - outlier_angular_errors = metrics_utils.get_measurement_angle_errors(outlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict) + inlier_angular_errors = metrics_utils.get_measurement_angle_errors( + inlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict + ) + outlier_angular_errors = metrics_utils.get_measurement_angle_errors( + outlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict + ) precision, recall = metrics_utils.get_precision_recall_from_errors( - inlier_angular_errors, outlier_angular_errors, MAX_INLIER_MEASUREMENT_ERROR_DEG + inlier_angular_errors, + outlier_angular_errors, + MAX_INLIER_MEASUREMENT_ERROR_DEG, ) measured_gt_i2Ui1_dict = {} - for (i1, i2) in set.union(inlier_i1_i2_pairs, outlier_i1_i2_pairs): + for i1, i2 in set.union(inlier_i1_i2_pairs, outlier_i1_i2_pairs): measured_gt_i2Ui1_dict[(i1, i2)] = gt_i2Ui1_dict[(i1, i2)] # Compute estimated poses after the averaging step and align them to ground truth. wTi_list: List[Optional[Pose3]] = [] - for (wRi, wti) in zip(wRi_list, wti_list): + for wRi, wti in zip(wRi_list, wti_list): if wRi is None or wti is None: wTi_list.append(None) else: wTi_list.append(Pose3(wRi, wti)) - wTi_aligned_list, _ = comp_utils.align_poses_sim3_ignore_missing(gt_wTi_list, wTi_list) - wti_aligned_list = [wTi.translation() if wTi is not None else None for wTi in wTi_aligned_list] - gt_wti_list = [gt_wTi.translation() if gt_wTi is not None else None for gt_wTi in gt_wTi_list] + wTi_aligned_list, _ = comp_utils.align_poses_sim3_ignore_missing( + gt_wTi_list, wTi_list + ) + wti_aligned_list = [ + wTi.translation() if wTi is not None else None + for wTi in wTi_aligned_list + ] + gt_wti_list = [ + gt_wTi.translation() if gt_wTi is not None else None + for gt_wTi in gt_wTi_list + ] threshold_suffix = str(int(MAX_INLIER_MEASUREMENT_ERROR_DEG)) + "_deg" ta_metrics.extend( [ GtsfmMetric("1dsfm_precision_" + threshold_suffix, precision), GtsfmMetric("1dsfm_recall_" + threshold_suffix, recall), - GtsfmMetric("1dsfm_inlier_angular_errors_deg", inlier_angular_errors), - GtsfmMetric("1dsfm_outlier_angular_errors_deg", outlier_angular_errors), - metrics_utils.compute_relative_translation_angle_metric(measured_gt_i2Ui1_dict, wTi_aligned_list), - metrics_utils.compute_translation_distance_metric(wti_aligned_list, gt_wti_list), - metrics_utils.compute_translation_angle_metric(gt_wTi_list, wTi_aligned_list), + GtsfmMetric( + "1dsfm_inlier_angular_errors_deg", inlier_angular_errors + ), + GtsfmMetric( + "1dsfm_outlier_angular_errors_deg", outlier_angular_errors + ), + metrics_utils.compute_relative_translation_angle_metric( + measured_gt_i2Ui1_dict, wTi_aligned_list + ), + metrics_utils.compute_translation_distance_metric( + wti_aligned_list, gt_wti_list + ), + metrics_utils.compute_translation_angle_metric( + gt_wTi_list, wTi_aligned_list + ), ] ) @@ -578,7 +738,8 @@ def compute_metrics( def get_valid_measurements_in_world_frame( - i2Ui1_dict: Dict[Tuple[int, int], Optional[Unit3]], wRi_list: List[Optional[Rot3]] + i2Ui1_dict: Dict[Tuple[int, int], Optional[Unit3]], + wRi_list: List[Optional[Rot3]], ) -> Tuple[RelativeDirectionsDict, Set[int]]: """Returns measurements for which both cameras have valid rotations, transformed to the world frame. diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index 678ad538e..38ce18f99 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -3,6 +3,7 @@ Authors: Xiaolong Wu, John Lambert, Ayush Baid """ import logging +import time from collections import Counter from pathlib import Path from typing import Dict, List, Optional, Set, Tuple @@ -102,16 +103,26 @@ def __init__( def __map_to_calibration_variable(self, camera_idx: int) -> int: return 0 if self._shared_calib else camera_idx - def __reprojection_factors(self, initial_data: GtsfmData, is_fisheye_calibration: bool) -> NonlinearFactorGraph: + def __reprojection_factors( + self, initial_data: GtsfmData, is_fisheye_calibration: bool + ) -> NonlinearFactorGraph: """Generate reprojection factors using the tracks.""" graph = NonlinearFactorGraph() # noise model for measurements -- one pixel in u and v - measurement_noise = gtsam.noiseModel.Isotropic.Sigma(IMG_MEASUREMENT_DIM, self._measurement_noise_sigma) + measurement_noise = gtsam.noiseModel.Isotropic.Sigma( + IMG_MEASUREMENT_DIM, self._measurement_noise_sigma + ) if self._robust_measurement_noise: - measurement_noise = gtsam.noiseModel.Robust(gtsam.noiseModel.mEstimator.Huber(1.345), measurement_noise) + measurement_noise = gtsam.noiseModel.Robust( + gtsam.noiseModel.mEstimator.Huber(1.345), measurement_noise + ) - sfm_factor_class = GeneralSFMFactor2Cal3Fisheye if is_fisheye_calibration else GeneralSFMFactor2Cal3Bundler + sfm_factor_class = ( + GeneralSFMFactor2Cal3Fisheye + if is_fisheye_calibration + else GeneralSFMFactor2Cal3Bundler + ) for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) # SfmTrack # retrieve the SfmMeasurement objects @@ -132,7 +143,9 @@ def __reprojection_factors(self, initial_data: GtsfmData, is_fisheye_calibration return graph def _between_factors( - self, relative_pose_priors: Dict[Tuple[int, int], PosePrior], cameras_to_model: List[int] + self, + relative_pose_priors: Dict[Tuple[int, int], PosePrior], + cameras_to_model: List[int], ) -> NonlinearFactorGraph: """Generate BetweenFactors on relative poses for pose variables.""" graph = NonlinearFactorGraph() @@ -170,26 +183,42 @@ def __pose_priors( PriorFactorPose3( X(camera_for_origin), initial_data.get_camera(camera_for_origin).pose(), - gtsam.noiseModel.Isotropic.Sigma(CAM_POSE3_DOF, self._cam_pose3_prior_noise_sigma), + gtsam.noiseModel.Isotropic.Sigma( + CAM_POSE3_DOF, self._cam_pose3_prior_noise_sigma + ), ) ) return graph def __calibration_priors( - self, initial_data: GtsfmData, cameras_to_model: List[int], is_fisheye_calibration: bool + self, + initial_data: GtsfmData, + cameras_to_model: List[int], + is_fisheye_calibration: bool, ) -> NonlinearFactorGraph: """Generate prior factors on calibration parameters of the cameras.""" graph = NonlinearFactorGraph() - calibration_prior_factor_class = PriorFactorCal3Fisheye if is_fisheye_calibration else PriorFactorCal3Bundler - calibration_prior_factor_dof = CAM_CAL3FISHEYE_DOF if is_fisheye_calibration else CAM_CAL3BUNDLER_DOF + calibration_prior_factor_class = ( + PriorFactorCal3Fisheye + if is_fisheye_calibration + else PriorFactorCal3Bundler + ) + calibration_prior_factor_dof = ( + CAM_CAL3FISHEYE_DOF + if is_fisheye_calibration + else CAM_CAL3BUNDLER_DOF + ) if self._shared_calib: graph.push_back( calibration_prior_factor_class( K(self.__map_to_calibration_variable(cameras_to_model[0])), initial_data.get_camera(cameras_to_model[0]).calibration(), - gtsam.noiseModel.Isotropic.Sigma(calibration_prior_factor_dof, self._calibration_prior_noise_sigma), + gtsam.noiseModel.Isotropic.Sigma( + calibration_prior_factor_dof, + self._calibration_prior_noise_sigma, + ), ) ) else: @@ -199,7 +228,8 @@ def __calibration_priors( K(self.__map_to_calibration_variable(i)), initial_data.get_camera(i).calibration(), gtsam.noiseModel.Isotropic.Sigma( - calibration_prior_factor_dof, self._calibration_prior_noise_sigma + calibration_prior_factor_dof, + self._calibration_prior_noise_sigma, ), ) ) @@ -214,16 +244,25 @@ def __construct_factor_graph( relative_pose_priors: Dict[Tuple[int, int], PosePrior], ) -> NonlinearFactorGraph: """Construct the factor graph with reprojection factors, BetweenFactors, and prior factors.""" - is_fisheye_calibration = isinstance(initial_data.get_camera(cameras_to_model[0]), PinholeCameraCal3Fisheye) + is_fisheye_calibration = isinstance( + initial_data.get_camera(cameras_to_model[0]), + PinholeCameraCal3Fisheye, + ) graph = NonlinearFactorGraph() # Create a factor graph graph.push_back( - self.__reprojection_factors(initial_data=initial_data, is_fisheye_calibration=is_fisheye_calibration) + self.__reprojection_factors( + initial_data=initial_data, + is_fisheye_calibration=is_fisheye_calibration, + ) ) graph.push_back( - self._between_factors(relative_pose_priors=relative_pose_priors, cameras_to_model=cameras_to_model) + self._between_factors( + relative_pose_priors=relative_pose_priors, + cameras_to_model=cameras_to_model, + ) ) graph.push_back( self.__pose_priors( @@ -232,12 +271,18 @@ def __construct_factor_graph( camera_for_origin=cameras_to_model[0], ) ) - graph.push_back(self.__calibration_priors(initial_data, cameras_to_model, is_fisheye_calibration)) + graph.push_back( + self.__calibration_priors( + initial_data, cameras_to_model, is_fisheye_calibration + ) + ) # Also add a prior on the position of the first landmark to fix the scale graph.push_back( gtsam.PriorFactorPoint3( - P(0), initial_data.get_track(0).point3(), gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1) + P(0), + initial_data.get_track(0).point3(), + gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1), ) ) @@ -253,7 +298,10 @@ def __initial_values(self, initial_data: GtsfmData) -> Values: initial_values.insert(X(i), camera.pose()) if not self._shared_calib or loop_idx == 0: # add only one value if calibrations are shared - initial_values.insert(K(self.__map_to_calibration_variable(i)), camera.calibration()) + initial_values.insert( + K(self.__map_to_calibration_variable(i)), + camera.calibration(), + ) # add each SfmTrack for j in range(initial_data.number_tracks()): @@ -262,7 +310,9 @@ def __initial_values(self, initial_data: GtsfmData) -> Values: return initial_values - def __optimize_factor_graph(self, graph: NonlinearFactorGraph, initial_values: Values) -> Values: + def __optimize_factor_graph( + self, graph: NonlinearFactorGraph, initial_values: Values + ) -> Values: """Optimize the factor graph.""" params = gtsam.LevenbergMarquardtParams() params.setVerbosityLM("ERROR") @@ -285,6 +335,60 @@ def __cameras_to_model( return sorted(list(cameras)) + def run_ba_step( + self, + initial_data: GtsfmData, + absolute_pose_priors: List[Optional[PosePrior]], + relative_pose_priors: Dict[Tuple[int, int], PosePrior], + reproj_error_thresh: Optional[float], + verbose: bool = True, + ): + cameras_to_model = self.__cameras_to_model( + initial_data, absolute_pose_priors, relative_pose_priors + ) + graph = self.__construct_factor_graph( + cameras_to_model=cameras_to_model, + initial_data=initial_data, + absolute_pose_priors=absolute_pose_priors, + relative_pose_priors=relative_pose_priors, + ) + initial_values = self.__initial_values(initial_data=initial_data) + result_values = self.__optimize_factor_graph(graph, initial_values) + + # Print error. + final_error = graph.error(result_values) + if verbose: + logger.info(f"initial error: {graph.error(initial_values):.2f}") + logger.info(f"final error: {final_error:.2f}") + + # Construct the results. + optimized_data = values_to_gtsfm_data( + result_values, initial_data, self._shared_calib + ) + + # Filter landmarks by reprojection error. + if reproj_error_thresh is not None: + if verbose: + logger.info( + "[Result] Number of tracks before filtering: %d", + optimized_data.number_tracks(), + ) + filtered_result, valid_mask = optimized_data.filter_landmarks( + reproj_error_thresh + ) + if verbose: + logger.info( + "[Result] Number of tracks after filtering: %d", + filtered_result.number_tracks(), + ) + + else: + valid_mask = [True] * optimized_data.number_tracks() + filtered_result = optimized_data + + # Set intermediate result as initial condition for next step. + return optimized_data, filtered_result, valid_mask, final_error + def run_ba( self, initial_data: GtsfmData, @@ -306,57 +410,33 @@ def run_ba( Valid mask as a list of booleans, indicating for each input track whether it was below the re-projection threshold. """ - logger.info( - "Input: %d tracks on %d cameras", initial_data.number_tracks(), len(initial_data.get_valid_camera_indices()) - ) - if initial_data.number_tracks() == 0 or len(initial_data.get_valid_camera_indices()) == 0: - # no cameras or tracks to optimize, so bundle adjustment is not possible - logger.error( - "Bundle adjustment aborting, optimization cannot be performed without any tracks or any cameras." - ) - return initial_data, initial_data, [False] * initial_data.number_tracks() - num_ba_steps = len(self._reproj_error_thresholds) - for step, reproj_error_thresh in enumerate(self._reproj_error_thresholds): - cameras_to_model = self.__cameras_to_model(initial_data, absolute_pose_priors, relative_pose_priors) - graph = self.__construct_factor_graph( - cameras_to_model=cameras_to_model, - initial_data=initial_data, - absolute_pose_priors=absolute_pose_priors, - relative_pose_priors=relative_pose_priors, + for step, reproj_error_thresh in enumerate( + self._reproj_error_thresholds + ): + ( + optimized_data, + filtered_result, + valid_mask, + final_error, + ) = self.run_ba_step( + initial_data, + absolute_pose_priors, + relative_pose_priors, + reproj_error_thresh, + verbose, ) - initial_values = self.__initial_values(initial_data=initial_data) - result_values = self.__optimize_factor_graph(graph, initial_values) - - # Print error. - final_error = graph.error(result_values) - if verbose: - logger.info(f"initial error: {graph.error(initial_values):.2f}") - logger.info(f"final error: {final_error:.2f}") - - # Construct the results. - optimized_data = values_to_gtsfm_data(result_values, initial_data, self._shared_calib) - - # Filter landmarks by reprojection error. - if reproj_error_thresh is not None: - if verbose: - logger.info("[Result] Number of tracks before filtering: %d", optimized_data.number_tracks()) - filtered_result, valid_mask = optimized_data.filter_landmarks(reproj_error_thresh) - if verbose: - logger.info("[Result] Number of tracks after filtering: %d", filtered_result.number_tracks()) - - else: - valid_mask = [True] * optimized_data.number_tracks() - filtered_result = optimized_data - - # Set intermediate result as initial condition for next step. - initial_data = filtered_result # Print intermediate results. if num_ba_steps > 1: logger.info( "[BA Step %d/%d] Error: %.2f, Number of tracks: %d" - % (step + 1, num_ba_steps, final_error, filtered_result.number_tracks()) + % ( + step + 1, + num_ba_steps, + final_error, + filtered_result.number_tracks(), + ) ) return optimized_data, filtered_result, valid_mask @@ -378,40 +458,141 @@ def evaluate( Metrics group containing metrics for both filtered and unfiltered BA results. """ ba_metrics = GtsfmMetricsGroup( - name=METRICS_GROUP, metrics=metrics_utils.get_stats_for_sfmdata(unfiltered_data, suffix="_unfiltered") + name=METRICS_GROUP, + metrics=metrics_utils.get_stats_for_sfmdata( + unfiltered_data, suffix="_unfiltered" + ), ) - poses_gt = [cam.pose() if cam is not None else None for cam in cameras_gt] + poses_gt = [ + cam.pose() if cam is not None else None for cam in cameras_gt + ] valid_poses_gt_count = len(poses_gt) - poses_gt.count(None) if valid_poses_gt_count == 0: return ba_metrics # align the sparse multi-view estimate after BA to the ground truth pose graph. - aligned_filtered_data = filtered_data.align_via_Sim3_to_poses(wTi_list_ref=poses_gt) + aligned_filtered_data = filtered_data.align_via_Sim3_to_poses( + wTi_list_ref=poses_gt + ) ba_pose_error_metrics = metrics_utils.compute_ba_pose_metrics( - gt_wTi_list=poses_gt, ba_output=aligned_filtered_data, save_dir=save_dir + gt_wTi_list=poses_gt, + ba_output=aligned_filtered_data, + save_dir=save_dir, ) ba_metrics.extend(metrics_group=ba_pose_error_metrics) - output_tracks_exit_codes = track_utils.classify_tracks3d_with_gt_cameras( - tracks=aligned_filtered_data.get_tracks(), cameras_gt=cameras_gt + output_tracks_exit_codes = ( + track_utils.classify_tracks3d_with_gt_cameras( + tracks=aligned_filtered_data.get_tracks(), + cameras_gt=cameras_gt, + ) + ) + output_tracks_exit_codes_distribution = Counter( + output_tracks_exit_codes ) - output_tracks_exit_codes_distribution = Counter(output_tracks_exit_codes) for exit_code, count in output_tracks_exit_codes_distribution.items(): - metric_name = "Filtered tracks triangulated with GT cams: {}".format(exit_code.name) + metric_name = ( + "Filtered tracks triangulated with GT cams: {}".format( + exit_code.name + ) + ) ba_metrics.add_metric(GtsfmMetric(name=metric_name, data=count)) - ba_metrics.add_metrics(metrics_utils.get_stats_for_sfmdata(aligned_filtered_data, suffix="_filtered")) + ba_metrics.add_metrics( + metrics_utils.get_stats_for_sfmdata( + aligned_filtered_data, suffix="_filtered" + ) + ) # ba_metrics.save_to_json(os.path.join(METRICS_PATH, "bundle_adjustment_metrics.json")) - logger.info("[Result] Mean track length %.3f", np.mean(aligned_filtered_data.get_track_lengths())) - logger.info("[Result] Median track length %.3f", np.median(aligned_filtered_data.get_track_lengths())) + logger.info( + "[Result] Mean track length %.3f", + np.mean(aligned_filtered_data.get_track_lengths()), + ) + logger.info( + "[Result] Median track length %.3f", + np.median(aligned_filtered_data.get_track_lengths()), + ) aligned_filtered_data.log_scene_reprojection_error_stats() return ba_metrics + def _run_ba_instrumented( + self, + initial_data: GtsfmData, + absolute_pose_priors: List[Optional[PosePrior]], + relative_pose_priors: Dict[Tuple[int, int], PosePrior], + cameras_gt: List[Optional[gtsfm_types.CAMERA_TYPE]], + save_dir: Optional[str] = None, + verbose: bool = True, + ): + logger.info( + "Input: %d tracks on %d cameras", + initial_data.number_tracks(), + len(initial_data.get_valid_camera_indices()), + ) + if ( + initial_data.number_tracks() == 0 + or len(initial_data.get_valid_camera_indices()) == 0 + ): + # no cameras or tracks to optimize, so bundle adjustment is not possible + logger.error( + "Bundle adjustment aborting, optimization cannot be performed without any tracks or any cameras." + ) + return ( + initial_data, + initial_data, + [False] * initial_data.number_tracks(), + ) + step_times = [] + start_time = time.time() + + num_ba_steps = len(self._reproj_error_thresholds) + for step, reproj_error_thresh in enumerate( + self._reproj_error_thresholds + ): + step_start_time = time.time() + ( + optimized_data, + filtered_result, + valid_mask, + final_error, + ) = self.run_ba_step( + initial_data, + absolute_pose_priors, + relative_pose_priors, + reproj_error_thresh, + verbose, + ) + step_times.append(time.time() - step_start_time) + + # Print intermediate results. + if num_ba_steps > 1: + logger.info( + "[BA Step %d/%d] Error: %.2f, Number of tracks: %d" + % ( + step + 1, + num_ba_steps, + final_error, + filtered_result.number_tracks(), + ) + ) + total_time = time.time() - start_time + + metrics = self.evaluate( + optimized_data, filtered_result, cameras_gt, save_dir + ) + for i, step_time in enumerate(step_times): + metrics.add_metric( + GtsfmMetric(f"step_{i}_run_duration_sec", step_time) + ) + metrics.add_metric(GtsfmMetric(f"total_run_duration_sec", total_time)) + + return optimized_data, filtered_result, valid_mask, metrics + def create_computation_graph( self, sfm_data_graph: Delayed, @@ -431,16 +612,22 @@ def create_computation_graph( GtsfmData aligned to GT (if provided), wrapped up using dask.delayed Metrics group for BA results, wrapped up using dask.delayed """ - optimized_sfm_data, filtered_sfm_data, _ = dask.delayed(self.run_ba, nout=3)( - sfm_data_graph, absolute_pose_priors, relative_pose_priors - ) - metrics_graph = dask.delayed(self.evaluate)( - optimized_sfm_data, filtered_sfm_data, cameras_gt, save_dir=save_dir + + _, filtered_sfm_data, _, metrics_graph = dask.delayed( + self._run_ba_instrumented, nout=4 + )( + sfm_data_graph, + absolute_pose_priors, + relative_pose_priors, + cameras_gt, + save_dir, ) return filtered_sfm_data, metrics_graph -def values_to_gtsfm_data(values: Values, initial_data: GtsfmData, shared_calib: bool) -> GtsfmData: +def values_to_gtsfm_data( + values: Values, initial_data: GtsfmData, shared_calib: bool +) -> GtsfmData: """Cast results from the optimization to GtsfmData object. Args: @@ -454,18 +641,30 @@ def values_to_gtsfm_data(values: Values, initial_data: GtsfmData, shared_calib: """ result = GtsfmData(initial_data.number_images()) - is_fisheye_calibration = isinstance(initial_data.get_camera(0), PinholeCameraCal3Fisheye) + is_fisheye_calibration = isinstance( + initial_data.get_camera(0), PinholeCameraCal3Fisheye + ) if is_fisheye_calibration: - cal3_value_extraction_lambda = lambda i: values.atCal3Fisheye(K(0 if shared_calib else i)) + cal3_value_extraction_lambda = lambda i: values.atCal3Fisheye( + K(0 if shared_calib else i) + ) else: - cal3_value_extraction_lambda = lambda i: values.atCal3Bundler(K(0 if shared_calib else i)) - camera_class = PinholeCameraCal3Fisheye if is_fisheye_calibration else PinholeCameraCal3Bundler + cal3_value_extraction_lambda = lambda i: values.atCal3Bundler( + K(0 if shared_calib else i) + ) + camera_class = ( + PinholeCameraCal3Fisheye + if is_fisheye_calibration + else PinholeCameraCal3Bundler + ) # add cameras for i in initial_data.get_valid_camera_indices(): result.add_camera( i, - camera_class(values.atPose3(X(i)), cal3_value_extraction_lambda(i)), + camera_class( + values.atPose3(X(i)), cal3_value_extraction_lambda(i) + ), ) # add tracks From 1959576823678950dc4be5d8bc0887edbab775d1 Mon Sep 17 00:00:00 2001 From: senselessdev1 Date: Sat, 12 Aug 2023 14:38:42 -0400 Subject: [PATCH 03/30] Fix formatting --- .../rotation/rotation_averaging_base.py | 28 +-- .../averaging/translation/averaging_1dsfm.py | 225 +++++------------- gtsfm/bundle/bundle_adjustment.py | 157 +++--------- gtsfm/utils/metrics.py | 6 +- 4 files changed, 98 insertions(+), 318 deletions(-) diff --git a/gtsfm/averaging/rotation/rotation_averaging_base.py b/gtsfm/averaging/rotation/rotation_averaging_base.py index a933a7c46..0697b7a96 100644 --- a/gtsfm/averaging/rotation/rotation_averaging_base.py +++ b/gtsfm/averaging/rotation/rotation_averaging_base.py @@ -80,9 +80,7 @@ def _run_rotation_averaging_base( Metrics on global rotations. """ start_time = time.time() - wRis = self.run_rotation_averaging( - num_images, i2Ri1_dict, i1Ti2_priors - ) + wRis = self.run_rotation_averaging(num_images, i2Ri1_dict, i1Ti2_priors) run_time = time.time() - start_time metrics = self.evaluate(wRis, wTi_gt) @@ -90,9 +88,7 @@ def _run_rotation_averaging_base( return wRis, metrics - def evaluate( - self, wRi_computed: List[Optional[Rot3]], wTi_gt: List[Optional[Pose3]] - ) -> GtsfmMetricsGroup: + def evaluate(self, wRi_computed: List[Optional[Rot3]], wTi_gt: List[Optional[Pose3]]) -> GtsfmMetricsGroup: """Evaluate the global rotations computed by the rotation averaging implementation. Args: @@ -104,14 +100,10 @@ def evaluate( Returns: Metrics on global rotations. """ - wRi_gt = [ - wTi.rotation() if wTi is not None else None for wTi in wTi_gt - ] + wRi_gt = [wTi.rotation() if wTi is not None else None for wTi in wTi_gt] if len(wRi_computed) != len(wRi_gt): - raise ValueError( - "Lengths of wRi_list and gt_wRi_list should be the same." - ) + raise ValueError("Lengths of wRi_list and gt_wRi_list should be the same.") wRi_aligned = comp_utils.align_rotations(wRi_gt, wRi_computed) @@ -122,12 +114,8 @@ def evaluate( data=len([x for x in wRi_computed if x is not None]), ) ) - metrics.append( - metric_utils.compute_rotation_angle_metric(wRi_aligned, wRi_gt) - ) - return GtsfmMetricsGroup( - name="rotation_averaging_metrics", metrics=metrics - ) + metrics.append(metric_utils.compute_rotation_angle_metric(wRi_aligned, wRi_gt)) + return GtsfmMetricsGroup(name="rotation_averaging_metrics", metrics=metrics) def create_computation_graph( self, @@ -148,8 +136,6 @@ def create_computation_graph( global rotations wrapped using dask.delayed. """ - wRis, metrics = dask.delayed( - self._run_rotation_averaging_base, nout=2 - )(num_images, i2Ri1_graph, i1Ti2_priors) + wRis, metrics = dask.delayed(self._run_rotation_averaging_base, nout=2)(num_images, i2Ri1_graph, i1Ti2_priors) return wRis, metrics diff --git a/gtsfm/averaging/translation/averaging_1dsfm.py b/gtsfm/averaging/translation/averaging_1dsfm.py index 8455d2ee0..83a5ecfd0 100644 --- a/gtsfm/averaging/translation/averaging_1dsfm.py +++ b/gtsfm/averaging/translation/averaging_1dsfm.py @@ -118,31 +118,16 @@ def __sample_projection_directions( """ num_measurements = len(w_i2Ui1_list) - if ( - self._projection_sampling_method - == self.ProjectionSamplingMethod.SAMPLE_INPUT_MEASUREMENTS - ): - num_samples = min( - num_measurements, self._max_1dsfm_projection_directions - ) - sampled_indices = np.random.choice( - num_measurements, num_samples, replace=False - ) + if self._projection_sampling_method == self.ProjectionSamplingMethod.SAMPLE_INPUT_MEASUREMENTS: + num_samples = min(num_measurements, self._max_1dsfm_projection_directions) + sampled_indices = np.random.choice(num_measurements, num_samples, replace=False) projections = [w_i2Ui1_list[idx] for idx in sampled_indices] - elif ( - self._projection_sampling_method - == self.ProjectionSamplingMethod.SAMPLE_WITH_INPUT_DENSITY - ): + elif self._projection_sampling_method == self.ProjectionSamplingMethod.SAMPLE_WITH_INPUT_DENSITY: projections = sampling_utils.sample_kde_directions( w_i2Ui1_list, num_samples=self._max_1dsfm_projection_directions ) - elif ( - self._projection_sampling_method - == self.ProjectionSamplingMethod.SAMPLE_WITH_UNIFORM_DENSITY - ): - projections = sampling_utils.sample_random_directions( - num_samples=self._max_1dsfm_projection_directions - ) + elif self._projection_sampling_method == self.ProjectionSamplingMethod.SAMPLE_WITH_UNIFORM_DENSITY: + projections = sampling_utils.sample_random_directions(num_samples=self._max_1dsfm_projection_directions) else: raise ValueError("Unsupported sampling method!") @@ -166,13 +151,9 @@ def _binary_measurements_from_dict( """ w_i1Ui2_measurements = BinaryMeasurementsUnit3() for (i1, i2), w_i2Ui1 in w_i2Ui1_dict.items(): - w_i1Ui2_measurements.append( - BinaryMeasurementUnit3(C(i2), C(i1), w_i2Ui1, noise_model) - ) + w_i1Ui2_measurements.append(BinaryMeasurementUnit3(C(i2), C(i1), w_i2Ui1, noise_model)) for (j, i), w_iUj in w_iUj_dict_tracks.items(): - w_i1Ui2_measurements.append( - BinaryMeasurementUnit3(C(i), L(j), w_iUj, noise_model) - ) + w_i1Ui2_measurements.append(BinaryMeasurementUnit3(C(i), L(j), w_iUj, noise_model)) return w_i1Ui2_measurements @@ -232,20 +213,12 @@ def compute_inliers( """ # Sample directions for projection - combined_measurements = list(w_i2Ui1_dict.values()) + list( - w_iUj_dict_tracks.values() - ) - projection_directions = self.__sample_projection_directions( - combined_measurements - ) + combined_measurements = list(w_i2Ui1_dict.values()) + list(w_iUj_dict_tracks.values()) + projection_directions = self.__sample_projection_directions(combined_measurements) # Convert to measurements: map indexes to symbols. - dummy_noise_model = gtsam.noiseModel.Isotropic.Sigma( - 3, 1e-2 - ) # MFAS does not use this. - w_i1Ui2_measurements = self._binary_measurements_from_dict( - w_i2Ui1_dict, w_iUj_dict_tracks, dummy_noise_model - ) + dummy_noise_model = gtsam.noiseModel.Isotropic.Sigma(3, 1e-2) # MFAS does not use this. + w_i1Ui2_measurements = self._binary_measurements_from_dict(w_i2Ui1_dict, w_iUj_dict_tracks, dummy_noise_model) # Compute outlier weights using MFAS. # TODO(ayush): parallelize this step. @@ -256,19 +229,14 @@ def compute_inliers( logger.debug("Computed outlier weights using MFAS.") # Compute average outlier weight. - outlier_weights_sum: DefaultDict[Tuple[int, int], float] = defaultdict( - float - ) + outlier_weights_sum: DefaultDict[Tuple[int, int], float] = defaultdict(float) inliers = set() for outlier_weight_dict in outlier_weights: for w_i1Ui2 in w_i1Ui2_measurements: i1, i2 = w_i1Ui2.key1(), w_i1Ui2.key2() outlier_weights_sum[(i1, i2)] += outlier_weight_dict[(i1, i2)] for (i1, i2), weight_sum in outlier_weights_sum.items(): - if ( - weight_sum / len(projection_directions) - < OUTLIER_WEIGHT_THRESHOLD - ): + if weight_sum / len(projection_directions) < OUTLIER_WEIGHT_THRESHOLD: inliers.add((i1, i2)) # Filter outliers, index back from symbol to int. @@ -293,9 +261,7 @@ def compute_inliers( return inlier_w_i2Ui1_dict, inlier_w_iUj_dict_tracks, inlier_cameras - def __get_initial_values( - self, wTi_initial: List[Optional[PosePrior]] - ) -> gtsam.Values: + def __get_initial_values(self, wTi_initial: List[Optional[PosePrior]]) -> gtsam.Values: """Converts translations from a list of absolute poses to gtsam.Values for initialization. Args: @@ -336,26 +302,17 @@ def _select_tracks_for_averaging( """ filtered_tracks = [] for track in tracks: - valid_cameras_track = track.select_for_cameras( - camera_idxs=valid_cameras - ) - if ( - valid_cameras_track.number_measurements() - < MIN_TRACK_MEASUREMENTS_FOR_AVERAGING - ): + valid_cameras_track = track.select_for_cameras(camera_idxs=valid_cameras) + if valid_cameras_track.number_measurements() < MIN_TRACK_MEASUREMENTS_FOR_AVERAGING: continue filtered_tracks.append(valid_cameras_track) tracks_subset = [] # Number of measurements per camera that we still need to add. - num_remaining_measurements = { - c: measurements_per_camera for c in valid_cameras - } + num_remaining_measurements = {c: measurements_per_camera for c in valid_cameras} # Number of measurements added by each track. - improvement = [ - t.number_measurements() for t in filtered_tracks - ] # how much cover each track would add + improvement = [t.number_measurements() for t in filtered_tracks] # how much cover each track would add # preparation: make a lookup from camera to tracks in the camera camera_track_lookup = {c: [] for c in valid_cameras} @@ -365,9 +322,7 @@ def _select_tracks_for_averaging( measurement = track.measurement(j) camera_track_lookup[measurement.i].append(track_id) - for count in range( - len(filtered_tracks) - ): # artificial limit to avoid infinite loop + for count in range(len(filtered_tracks)): # artificial limit to avoid infinite loop # choose track that maximizes the heuristic best_track_id = np.argmax(improvement) if improvement[best_track_id] <= 0: @@ -379,17 +334,13 @@ def _select_tracks_for_averaging( for measurement in filtered_tracks[best_track_id].measurements: if num_remaining_measurements[measurement.i] == 0: continue - num_remaining_measurements[measurement.i] = ( - num_remaining_measurements[measurement.i] - 1 - ) + num_remaining_measurements[measurement.i] = num_remaining_measurements[measurement.i] - 1 # If this image just got covered the k'th time, it's done. # Lower the improvement for all tracks that see it. if num_remaining_measurements[measurement.i] == 0: for t in camera_track_lookup[measurement.i]: - improvement[t] = ( - improvement[t] - 1 if improvement[t] > 0 else 0 - ) + improvement[t] = improvement[t] - 1 if improvement[t] > 0 else 0 return tracks_subset @@ -416,17 +367,11 @@ def _get_landmark_directions( cam_idx = measurement.i if intrinsics[cam_idx] is None or wRi_list[cam_idx] is None: - raise ValueError( - "Camera intrinsics or rotation cannot be None for input track measurements" - ) + raise ValueError("Camera intrinsics or rotation cannot be None for input track measurements") measurement_xy = intrinsics[cam_idx].calibrate(measurement.uv) - measurement_homog = Point3( - measurement_xy[0], measurement_xy[1], 1.0 - ) - w_cam_U_track = Unit3( - wRi_list[cam_idx].rotate(Unit3(measurement_homog).point3()) - ) + measurement_homog = Point3(measurement_xy[0], measurement_xy[1], 1.0) + w_cam_U_track = Unit3(wRi_list[cam_idx].rotate(Unit3(measurement_homog).point3())) # Direction starts at camera, but first index is track_id. landmark_directions[(track_id, cam_idx)] = w_cam_U_track @@ -462,31 +407,21 @@ def __run_averaging( len(w_i2Ui1_dict), ) - noise_model = gtsam.noiseModel.Isotropic.Sigma( - NOISE_MODEL_DIMENSION, NOISE_MODEL_SIGMA - ) + noise_model = gtsam.noiseModel.Isotropic.Sigma(NOISE_MODEL_DIMENSION, NOISE_MODEL_SIGMA) if self._robust_measurement_noise: huber_loss = gtsam.noiseModel.mEstimator.Huber.Create(HUBER_LOSS_K) - noise_model = gtsam.noiseModel.Robust.Create( - huber_loss, noise_model - ) + noise_model = gtsam.noiseModel.Robust.Create(huber_loss, noise_model) - w_i1Ui2_measurements = self._binary_measurements_from_dict( - w_i2Ui1_dict, w_i2Ui1_dict_tracks, noise_model - ) + w_i1Ui2_measurements = self._binary_measurements_from_dict(w_i2Ui1_dict, w_i2Ui1_dict_tracks, noise_model) # Run the optimizer. try: algorithm = TranslationRecovery() if len(i2Ti1_priors) > 0: # scale is ignored here. - w_i1ti2_priors = self._binary_measurements_from_priors( - i2Ti1_priors, wRi_list - ) + w_i1ti2_priors = self._binary_measurements_from_priors(i2Ti1_priors, wRi_list) wti_initial = self.__get_initial_values(absolute_pose_priors) - wti_values = algorithm.run( - w_i1Ui2_measurements, 0.0, w_i1ti2_priors, wti_initial - ) + wti_values = algorithm.run(w_i1Ui2_measurements, 0.0, w_i1ti2_priors, wti_initial) else: wti_values = algorithm.run(w_i1Ui2_measurements, scale_factor) except TypeError as e: @@ -509,9 +444,7 @@ def run_translation_averaging( i2Ui1_dict: Dict[Tuple[int, int], Optional[Unit3]], wRi_list: List[Optional[Rot3]], tracks_2d: Optional[List[SfmTrack2d]] = None, - intrinsics: Optional[ - List[Optional[gtsfm_types.CALIBRATION_TYPE]] - ] = None, + intrinsics: Optional[List[Optional[gtsfm_types.CALIBRATION_TYPE]]] = None, absolute_pose_priors: List[Optional[PosePrior]] = [], i2Ti1_priors: Dict[Tuple[int, int], PosePrior] = {}, scale_factor: float = 1.0, @@ -539,28 +472,18 @@ def run_translation_averaging( len(i2Ui1_dict), ) - w_i2Ui1_dict, valid_cameras = get_valid_measurements_in_world_frame( - i2Ui1_dict, wRi_list - ) + w_i2Ui1_dict, valid_cameras = get_valid_measurements_in_world_frame(i2Ui1_dict, wRi_list) start_time = time.time() if self._use_tracks_for_averaging: if tracks_2d is None: - logger.info( - "No tracks provided for translation averaging. Falling back to camera unit translations." - ) + logger.info("No tracks provided for translation averaging. Falling back to camera unit translations.") w_i2Ui1_dict_tracks = {} elif intrinsics is None or len(intrinsics) != len(wRi_list): - raise ValueError( - "Number of intrinsics must match number of rotations when tracks are provided." - ) + raise ValueError("Number of intrinsics must match number of rotations when tracks are provided.") else: - selected_tracks = self._select_tracks_for_averaging( - tracks_2d, valid_cameras, intrinsics - ) - w_i2Ui1_dict_tracks = self._get_landmark_directions( - selected_tracks, intrinsics, wRi_list - ) + selected_tracks = self._select_tracks_for_averaging(tracks_2d, valid_cameras, intrinsics) + w_i2Ui1_dict_tracks = self._get_landmark_directions(selected_tracks, intrinsics, wRi_list) else: w_i2Ui1_dict_tracks = {} @@ -602,20 +525,13 @@ def run_translation_averaging( # Combine estimated global rotations and global translations to Pose(3) objects. wTi_list = [ - Pose3(wRi, wti) if wRi is not None and wti is not None else None - for wRi, wti in zip(wRi_list, wti_list) + Pose3(wRi, wti) if wRi is not None and wti is not None else None for wRi, wti in zip(wRi_list, wti_list) ] total_time = time.time() - start_time logger.info("Translation averaging took %.4f seconds.", total_time) ta_metrics.add_metric(GtsfmMetric("total_duration_sec", total_time)) - ta_metrics.add_metric( - GtsfmMetric( - "outier_rejection_duration_sec", inlier_computation_time - ) - ) - ta_metrics.add_metric( - GtsfmMetric("optimization_duration_sec", averaging_time) - ) + ta_metrics.add_metric(GtsfmMetric("outier_rejection_duration_sec", inlier_computation_time)) + ta_metrics.add_metric(GtsfmMetric("optimization_duration_sec", averaging_time)) return wTi_list, ta_metrics @@ -641,22 +557,13 @@ def compute_metrics( - Distribution of translation direction angle for outlier measurements. """ outlier_i1_i2_pairs = ( - set( - [ - pair_idx - for pair_idx, val in i2Ui1_dict.items() - if val is not None - ] - ) - - inlier_i1_i2_pairs + set([pair_idx for pair_idx, val in i2Ui1_dict.items() if val is not None]) - inlier_i1_i2_pairs ) num_total_measurements = len(inlier_i1_i2_pairs) + len(outlier_i1_i2_pairs) ta_metrics = [ GtsfmMetric("num_total_1dsfm_measurements", num_total_measurements), GtsfmMetric("num_inlier_1dsfm_measurements", len(inlier_i1_i2_pairs)), - GtsfmMetric( - "num_outlier_1dsfm_measurements", len(outlier_i1_i2_pairs) - ), + GtsfmMetric("num_outlier_1dsfm_measurements", len(outlier_i1_i2_pairs)), GtsfmMetric( "num_translations_estimated", len([wti for wti in wti_list if wti is not None]), @@ -664,24 +571,16 @@ def compute_metrics( ] # Remaining metrics require ground truth, so return if GT is not available. - gt_available = np.array( - [gt_wTi is not None for gt_wTi in gt_wTi_list] - ).any() + gt_available = np.array([gt_wTi is not None for gt_wTi in gt_wTi_list]).any() if not gt_available: return GtsfmMetricsGroup("translation_averaging_metrics", ta_metrics) # Get ground truth translation directions for the measurements. - gt_i2Ui1_dict = metrics_utils.get_twoview_translation_directions( - gt_wTi_list - ) + gt_i2Ui1_dict = metrics_utils.get_twoview_translation_directions(gt_wTi_list) # Angle between i2Ui1 measurement and GT i2Ui1 measurement for inliers and outliers. - inlier_angular_errors = metrics_utils.get_measurement_angle_errors( - inlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict - ) - outlier_angular_errors = metrics_utils.get_measurement_angle_errors( - outlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict - ) + inlier_angular_errors = metrics_utils.get_measurement_angle_errors(inlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict) + outlier_angular_errors = metrics_utils.get_measurement_angle_errors(outlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict) precision, recall = metrics_utils.get_precision_recall_from_errors( inlier_angular_errors, outlier_angular_errors, @@ -699,38 +598,20 @@ def compute_metrics( wTi_list.append(None) else: wTi_list.append(Pose3(wRi, wti)) - wTi_aligned_list, _ = comp_utils.align_poses_sim3_ignore_missing( - gt_wTi_list, wTi_list - ) - wti_aligned_list = [ - wTi.translation() if wTi is not None else None - for wTi in wTi_aligned_list - ] - gt_wti_list = [ - gt_wTi.translation() if gt_wTi is not None else None - for gt_wTi in gt_wTi_list - ] + wTi_aligned_list, _ = comp_utils.align_poses_sim3_ignore_missing(gt_wTi_list, wTi_list) + wti_aligned_list = [wTi.translation() if wTi is not None else None for wTi in wTi_aligned_list] + gt_wti_list = [gt_wTi.translation() if gt_wTi is not None else None for gt_wTi in gt_wTi_list] threshold_suffix = str(int(MAX_INLIER_MEASUREMENT_ERROR_DEG)) + "_deg" ta_metrics.extend( [ GtsfmMetric("1dsfm_precision_" + threshold_suffix, precision), GtsfmMetric("1dsfm_recall_" + threshold_suffix, recall), - GtsfmMetric( - "1dsfm_inlier_angular_errors_deg", inlier_angular_errors - ), - GtsfmMetric( - "1dsfm_outlier_angular_errors_deg", outlier_angular_errors - ), - metrics_utils.compute_relative_translation_angle_metric( - measured_gt_i2Ui1_dict, wTi_aligned_list - ), - metrics_utils.compute_translation_distance_metric( - wti_aligned_list, gt_wti_list - ), - metrics_utils.compute_translation_angle_metric( - gt_wTi_list, wTi_aligned_list - ), + GtsfmMetric("1dsfm_inlier_angular_errors_deg", inlier_angular_errors), + GtsfmMetric("1dsfm_outlier_angular_errors_deg", outlier_angular_errors), + metrics_utils.compute_relative_translation_angle_metric(measured_gt_i2Ui1_dict, wTi_aligned_list), + metrics_utils.compute_translation_distance_metric(wti_aligned_list, gt_wti_list), + metrics_utils.compute_translation_angle_metric(gt_wTi_list, wTi_aligned_list), ] ) diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index 38ce18f99..77a7acce8 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -103,26 +103,16 @@ def __init__( def __map_to_calibration_variable(self, camera_idx: int) -> int: return 0 if self._shared_calib else camera_idx - def __reprojection_factors( - self, initial_data: GtsfmData, is_fisheye_calibration: bool - ) -> NonlinearFactorGraph: + def __reprojection_factors(self, initial_data: GtsfmData, is_fisheye_calibration: bool) -> NonlinearFactorGraph: """Generate reprojection factors using the tracks.""" graph = NonlinearFactorGraph() # noise model for measurements -- one pixel in u and v - measurement_noise = gtsam.noiseModel.Isotropic.Sigma( - IMG_MEASUREMENT_DIM, self._measurement_noise_sigma - ) + measurement_noise = gtsam.noiseModel.Isotropic.Sigma(IMG_MEASUREMENT_DIM, self._measurement_noise_sigma) if self._robust_measurement_noise: - measurement_noise = gtsam.noiseModel.Robust( - gtsam.noiseModel.mEstimator.Huber(1.345), measurement_noise - ) + measurement_noise = gtsam.noiseModel.Robust(gtsam.noiseModel.mEstimator.Huber(1.345), measurement_noise) - sfm_factor_class = ( - GeneralSFMFactor2Cal3Fisheye - if is_fisheye_calibration - else GeneralSFMFactor2Cal3Bundler - ) + sfm_factor_class = GeneralSFMFactor2Cal3Fisheye if is_fisheye_calibration else GeneralSFMFactor2Cal3Bundler for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) # SfmTrack # retrieve the SfmMeasurement objects @@ -183,9 +173,7 @@ def __pose_priors( PriorFactorPose3( X(camera_for_origin), initial_data.get_camera(camera_for_origin).pose(), - gtsam.noiseModel.Isotropic.Sigma( - CAM_POSE3_DOF, self._cam_pose3_prior_noise_sigma - ), + gtsam.noiseModel.Isotropic.Sigma(CAM_POSE3_DOF, self._cam_pose3_prior_noise_sigma), ) ) @@ -200,16 +188,8 @@ def __calibration_priors( """Generate prior factors on calibration parameters of the cameras.""" graph = NonlinearFactorGraph() - calibration_prior_factor_class = ( - PriorFactorCal3Fisheye - if is_fisheye_calibration - else PriorFactorCal3Bundler - ) - calibration_prior_factor_dof = ( - CAM_CAL3FISHEYE_DOF - if is_fisheye_calibration - else CAM_CAL3BUNDLER_DOF - ) + calibration_prior_factor_class = PriorFactorCal3Fisheye if is_fisheye_calibration else PriorFactorCal3Bundler + calibration_prior_factor_dof = CAM_CAL3FISHEYE_DOF if is_fisheye_calibration else CAM_CAL3BUNDLER_DOF if self._shared_calib: graph.push_back( calibration_prior_factor_class( @@ -271,11 +251,7 @@ def __construct_factor_graph( camera_for_origin=cameras_to_model[0], ) ) - graph.push_back( - self.__calibration_priors( - initial_data, cameras_to_model, is_fisheye_calibration - ) - ) + graph.push_back(self.__calibration_priors(initial_data, cameras_to_model, is_fisheye_calibration)) # Also add a prior on the position of the first landmark to fix the scale graph.push_back( @@ -310,9 +286,7 @@ def __initial_values(self, initial_data: GtsfmData) -> Values: return initial_values - def __optimize_factor_graph( - self, graph: NonlinearFactorGraph, initial_values: Values - ) -> Values: + def __optimize_factor_graph(self, graph: NonlinearFactorGraph, initial_values: Values) -> Values: """Optimize the factor graph.""" params = gtsam.LevenbergMarquardtParams() params.setVerbosityLM("ERROR") @@ -343,9 +317,7 @@ def run_ba_step( reproj_error_thresh: Optional[float], verbose: bool = True, ): - cameras_to_model = self.__cameras_to_model( - initial_data, absolute_pose_priors, relative_pose_priors - ) + cameras_to_model = self.__cameras_to_model(initial_data, absolute_pose_priors, relative_pose_priors) graph = self.__construct_factor_graph( cameras_to_model=cameras_to_model, initial_data=initial_data, @@ -362,9 +334,7 @@ def run_ba_step( logger.info(f"final error: {final_error:.2f}") # Construct the results. - optimized_data = values_to_gtsfm_data( - result_values, initial_data, self._shared_calib - ) + optimized_data = values_to_gtsfm_data(result_values, initial_data, self._shared_calib) # Filter landmarks by reprojection error. if reproj_error_thresh is not None: @@ -373,9 +343,7 @@ def run_ba_step( "[Result] Number of tracks before filtering: %d", optimized_data.number_tracks(), ) - filtered_result, valid_mask = optimized_data.filter_landmarks( - reproj_error_thresh - ) + filtered_result, valid_mask = optimized_data.filter_landmarks(reproj_error_thresh) if verbose: logger.info( "[Result] Number of tracks after filtering: %d", @@ -411,15 +379,8 @@ def run_ba( threshold. """ num_ba_steps = len(self._reproj_error_thresholds) - for step, reproj_error_thresh in enumerate( - self._reproj_error_thresholds - ): - ( - optimized_data, - filtered_result, - valid_mask, - final_error, - ) = self.run_ba_step( + for step, reproj_error_thresh in enumerate(self._reproj_error_thresholds): + (optimized_data, filtered_result, valid_mask, final_error,) = self.run_ba_step( initial_data, absolute_pose_priors, relative_pose_priors, @@ -459,23 +420,17 @@ def evaluate( """ ba_metrics = GtsfmMetricsGroup( name=METRICS_GROUP, - metrics=metrics_utils.get_stats_for_sfmdata( - unfiltered_data, suffix="_unfiltered" - ), + metrics=metrics_utils.get_stats_for_sfmdata(unfiltered_data, suffix="_unfiltered"), ) - poses_gt = [ - cam.pose() if cam is not None else None for cam in cameras_gt - ] + poses_gt = [cam.pose() if cam is not None else None for cam in cameras_gt] valid_poses_gt_count = len(poses_gt) - poses_gt.count(None) if valid_poses_gt_count == 0: return ba_metrics # align the sparse multi-view estimate after BA to the ground truth pose graph. - aligned_filtered_data = filtered_data.align_via_Sim3_to_poses( - wTi_list_ref=poses_gt - ) + aligned_filtered_data = filtered_data.align_via_Sim3_to_poses(wTi_list_ref=poses_gt) ba_pose_error_metrics = metrics_utils.compute_ba_pose_metrics( gt_wTi_list=poses_gt, ba_output=aligned_filtered_data, @@ -483,29 +438,17 @@ def evaluate( ) ba_metrics.extend(metrics_group=ba_pose_error_metrics) - output_tracks_exit_codes = ( - track_utils.classify_tracks3d_with_gt_cameras( - tracks=aligned_filtered_data.get_tracks(), - cameras_gt=cameras_gt, - ) - ) - output_tracks_exit_codes_distribution = Counter( - output_tracks_exit_codes + output_tracks_exit_codes = track_utils.classify_tracks3d_with_gt_cameras( + tracks=aligned_filtered_data.get_tracks(), + cameras_gt=cameras_gt, ) + output_tracks_exit_codes_distribution = Counter(output_tracks_exit_codes) for exit_code, count in output_tracks_exit_codes_distribution.items(): - metric_name = ( - "Filtered tracks triangulated with GT cams: {}".format( - exit_code.name - ) - ) + metric_name = "Filtered tracks triangulated with GT cams: {}".format(exit_code.name) ba_metrics.add_metric(GtsfmMetric(name=metric_name, data=count)) - ba_metrics.add_metrics( - metrics_utils.get_stats_for_sfmdata( - aligned_filtered_data, suffix="_filtered" - ) - ) + ba_metrics.add_metrics(metrics_utils.get_stats_for_sfmdata(aligned_filtered_data, suffix="_filtered")) # ba_metrics.save_to_json(os.path.join(METRICS_PATH, "bundle_adjustment_metrics.json")) logger.info( @@ -534,10 +477,7 @@ def _run_ba_instrumented( initial_data.number_tracks(), len(initial_data.get_valid_camera_indices()), ) - if ( - initial_data.number_tracks() == 0 - or len(initial_data.get_valid_camera_indices()) == 0 - ): + if initial_data.number_tracks() == 0 or len(initial_data.get_valid_camera_indices()) == 0: # no cameras or tracks to optimize, so bundle adjustment is not possible logger.error( "Bundle adjustment aborting, optimization cannot be performed without any tracks or any cameras." @@ -551,16 +491,9 @@ def _run_ba_instrumented( start_time = time.time() num_ba_steps = len(self._reproj_error_thresholds) - for step, reproj_error_thresh in enumerate( - self._reproj_error_thresholds - ): + for step, reproj_error_thresh in enumerate(self._reproj_error_thresholds): step_start_time = time.time() - ( - optimized_data, - filtered_result, - valid_mask, - final_error, - ) = self.run_ba_step( + (optimized_data, filtered_result, valid_mask, final_error,) = self.run_ba_step( initial_data, absolute_pose_priors, relative_pose_priors, @@ -582,13 +515,9 @@ def _run_ba_instrumented( ) total_time = time.time() - start_time - metrics = self.evaluate( - optimized_data, filtered_result, cameras_gt, save_dir - ) + metrics = self.evaluate(optimized_data, filtered_result, cameras_gt, save_dir) for i, step_time in enumerate(step_times): - metrics.add_metric( - GtsfmMetric(f"step_{i}_run_duration_sec", step_time) - ) + metrics.add_metric(GtsfmMetric(f"step_{i}_run_duration_sec", step_time)) metrics.add_metric(GtsfmMetric(f"total_run_duration_sec", total_time)) return optimized_data, filtered_result, valid_mask, metrics @@ -613,9 +542,7 @@ def create_computation_graph( Metrics group for BA results, wrapped up using dask.delayed """ - _, filtered_sfm_data, _, metrics_graph = dask.delayed( - self._run_ba_instrumented, nout=4 - )( + _, filtered_sfm_data, _, metrics_graph = dask.delayed(self._run_ba_instrumented, nout=4)( sfm_data_graph, absolute_pose_priors, relative_pose_priors, @@ -625,9 +552,7 @@ def create_computation_graph( return filtered_sfm_data, metrics_graph -def values_to_gtsfm_data( - values: Values, initial_data: GtsfmData, shared_calib: bool -) -> GtsfmData: +def values_to_gtsfm_data(values: Values, initial_data: GtsfmData, shared_calib: bool) -> GtsfmData: """Cast results from the optimization to GtsfmData object. Args: @@ -641,30 +566,18 @@ def values_to_gtsfm_data( """ result = GtsfmData(initial_data.number_images()) - is_fisheye_calibration = isinstance( - initial_data.get_camera(0), PinholeCameraCal3Fisheye - ) + is_fisheye_calibration = isinstance(initial_data.get_camera(0), PinholeCameraCal3Fisheye) if is_fisheye_calibration: - cal3_value_extraction_lambda = lambda i: values.atCal3Fisheye( - K(0 if shared_calib else i) - ) + cal3_value_extraction_lambda = lambda i: values.atCal3Fisheye(K(0 if shared_calib else i)) else: - cal3_value_extraction_lambda = lambda i: values.atCal3Bundler( - K(0 if shared_calib else i) - ) - camera_class = ( - PinholeCameraCal3Fisheye - if is_fisheye_calibration - else PinholeCameraCal3Bundler - ) + cal3_value_extraction_lambda = lambda i: values.atCal3Bundler(K(0 if shared_calib else i)) + camera_class = PinholeCameraCal3Fisheye if is_fisheye_calibration else PinholeCameraCal3Bundler # add cameras for i in initial_data.get_valid_camera_indices(): result.add_camera( i, - camera_class( - values.atPose3(X(i)), cal3_value_extraction_lambda(i) - ), + camera_class(values.atPose3(X(i)), cal3_value_extraction_lambda(i)), ) # add tracks diff --git a/gtsfm/utils/metrics.py b/gtsfm/utils/metrics.py index 2777cae6e..5c08c2177 100644 --- a/gtsfm/utils/metrics.py +++ b/gtsfm/utils/metrics.py @@ -535,9 +535,9 @@ def pose_auc( if save_plot: if save_dir is None: raise ValueError("If `save_plot` is True, then `save_dir` must be provided.") - _ = plt.figure(dpi=200, facecolor='white') - plt.style.use('ggplot') - sns.set_style({'font.family': 'Times New Roman'}) + _ = plt.figure(dpi=200, facecolor="white") + plt.style.use("ggplot") + sns.set_style({"font.family": "Times New Roman"}) plt.scatter(e, r, 20, color="k", marker=".") plt.plot(e, r, color="r") plt.ylabel("Recall") From e6f8b581cd4eb28a57e69aeb509e0b56b7ac0de1 Mon Sep 17 00:00:00 2001 From: senselessdev1 Date: Sat, 12 Aug 2023 14:43:42 -0400 Subject: [PATCH 04/30] Remove formatting changes --- .../rotation/rotation_averaging_base.py | 5 +-- .../averaging/translation/averaging_1dsfm.py | 43 +++++-------------- 2 files changed, 11 insertions(+), 37 deletions(-) diff --git a/gtsfm/averaging/rotation/rotation_averaging_base.py b/gtsfm/averaging/rotation/rotation_averaging_base.py index 0697b7a96..8d790584f 100644 --- a/gtsfm/averaging/rotation/rotation_averaging_base.py +++ b/gtsfm/averaging/rotation/rotation_averaging_base.py @@ -29,10 +29,7 @@ def get_ui_metadata() -> UiMetadata: return UiMetadata( display_name="Rotation Averaging", - input_products=( - "View-Graph Relative Rotations", - "Relative Pose Priors", - ), + input_products=("View-Graph Relative Rotations", "Relative Pose Priors"), output_products="Global Rotations", parent_plate="Sparse Reconstruction", ) diff --git a/gtsfm/averaging/translation/averaging_1dsfm.py b/gtsfm/averaging/translation/averaging_1dsfm.py index 83a5ecfd0..1419f037d 100644 --- a/gtsfm/averaging/translation/averaging_1dsfm.py +++ b/gtsfm/averaging/translation/averaging_1dsfm.py @@ -35,9 +35,7 @@ import gtsfm.utils.logger as logger_utils import gtsfm.utils.metrics as metrics_utils import gtsfm.utils.sampling as sampling_utils -from gtsfm.averaging.translation.translation_averaging_base import ( - TranslationAveragingBase, -) +from gtsfm.averaging.translation.translation_averaging_base import TranslationAveragingBase from gtsfm.common.pose_prior import PosePrior from gtsfm.common.sfm_track import SfmTrack2d from gtsfm.evaluation.metrics import GtsfmMetric, GtsfmMetricsGroup @@ -467,10 +465,7 @@ def run_translation_averaging( or ill-constrained system). A GtsfmMetricsGroup of 1DSfM metrics. """ - logger.info( - "Running translation averaging on %d unit translations", - len(i2Ui1_dict), - ) + logger.info("Running translation averaging on %d unit translations", len(i2Ui1_dict)) w_i2Ui1_dict, valid_cameras = get_valid_measurements_in_world_frame(i2Ui1_dict, wRi_list) @@ -488,11 +483,9 @@ def run_translation_averaging( w_i2Ui1_dict_tracks = {} inlier_computation_start_time = time.time() - ( - w_i2Ui1_dict_inliers, - w_i2Ui1_dict_tracks_inliers, - inlier_cameras, - ) = self.compute_inliers(w_i2Ui1_dict, w_i2Ui1_dict_tracks) + (w_i2Ui1_dict_inliers, w_i2Ui1_dict_tracks_inliers, inlier_cameras) = self.compute_inliers( + w_i2Ui1_dict, w_i2Ui1_dict_tracks + ) inlier_computation_time = time.time() - inlier_computation_start_time averaging_start_time = time.time() @@ -508,20 +501,10 @@ def run_translation_averaging( averaging_time = time.time() - averaging_start_time # Compute the metrics. - ta_metrics = compute_metrics( - set(w_i2Ui1_dict_inliers.keys()), - i2Ui1_dict, - wRi_list, - wti_list, - gt_wTi_list, - ) + ta_metrics = compute_metrics(set(w_i2Ui1_dict_inliers.keys()), i2Ui1_dict, wRi_list, wti_list, gt_wTi_list) num_translations = sum([1 for wti in wti_list if wti is not None]) - logger.info( - "Estimated %d translations out of %d images.", - num_translations, - num_images, - ) + logger.info("Estimated %d translations out of %d images.", num_translations, num_images) # Combine estimated global rotations and global translations to Pose(3) objects. wTi_list = [ @@ -564,10 +547,7 @@ def compute_metrics( GtsfmMetric("num_total_1dsfm_measurements", num_total_measurements), GtsfmMetric("num_inlier_1dsfm_measurements", len(inlier_i1_i2_pairs)), GtsfmMetric("num_outlier_1dsfm_measurements", len(outlier_i1_i2_pairs)), - GtsfmMetric( - "num_translations_estimated", - len([wti for wti in wti_list if wti is not None]), - ), + GtsfmMetric("num_translations_estimated", len([wti for wti in wti_list if wti is not None])), ] # Remaining metrics require ground truth, so return if GT is not available. @@ -582,9 +562,7 @@ def compute_metrics( inlier_angular_errors = metrics_utils.get_measurement_angle_errors(inlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict) outlier_angular_errors = metrics_utils.get_measurement_angle_errors(outlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict) precision, recall = metrics_utils.get_precision_recall_from_errors( - inlier_angular_errors, - outlier_angular_errors, - MAX_INLIER_MEASUREMENT_ERROR_DEG, + inlier_angular_errors, outlier_angular_errors, MAX_INLIER_MEASUREMENT_ERROR_DEG ) measured_gt_i2Ui1_dict = {} @@ -619,8 +597,7 @@ def compute_metrics( def get_valid_measurements_in_world_frame( - i2Ui1_dict: Dict[Tuple[int, int], Optional[Unit3]], - wRi_list: List[Optional[Rot3]], + i2Ui1_dict: Dict[Tuple[int, int], Optional[Unit3]], wRi_list: List[Optional[Rot3]] ) -> Tuple[RelativeDirectionsDict, Set[int]]: """Returns measurements for which both cameras have valid rotations, transformed to the world frame. From 33c3318525516b2b8b73fa0cca083dc2ffbb0d6c Mon Sep 17 00:00:00 2001 From: senselessdev1 Date: Sat, 12 Aug 2023 15:59:03 -0400 Subject: [PATCH 05/30] fix formatting --- .../rotation/rotation_averaging_base.py | 7 +----- .../averaging/translation/averaging_1dsfm.py | 13 +++-------- gtsfm/bundle/bundle_adjustment.py | 23 ++++++++++--------- 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/gtsfm/averaging/rotation/rotation_averaging_base.py b/gtsfm/averaging/rotation/rotation_averaging_base.py index 8d790584f..30ca629ed 100644 --- a/gtsfm/averaging/rotation/rotation_averaging_base.py +++ b/gtsfm/averaging/rotation/rotation_averaging_base.py @@ -105,12 +105,7 @@ def evaluate(self, wRi_computed: List[Optional[Rot3]], wTi_gt: List[Optional[Pos wRi_aligned = comp_utils.align_rotations(wRi_gt, wRi_computed) metrics = [] - metrics.append( - GtsfmMetric( - name="num_rotations_computed", - data=len([x for x in wRi_computed if x is not None]), - ) - ) + metrics.append(GtsfmMetric(name="num_rotations_computed", data=len([x for x in wRi_computed if x is not None]))) metrics.append(metric_utils.compute_rotation_angle_metric(wRi_aligned, wRi_gt)) return GtsfmMetricsGroup(name="rotation_averaging_metrics", metrics=metrics) diff --git a/gtsfm/averaging/translation/averaging_1dsfm.py b/gtsfm/averaging/translation/averaging_1dsfm.py index 1419f037d..4da039b7c 100644 --- a/gtsfm/averaging/translation/averaging_1dsfm.py +++ b/gtsfm/averaging/translation/averaging_1dsfm.py @@ -156,9 +156,7 @@ def _binary_measurements_from_dict( return w_i1Ui2_measurements def _binary_measurements_from_priors( - self, - i2Ti1_priors: Dict[Tuple[int, int], PosePrior], - wRi_list: List[Rot3], + self, i2Ti1_priors: Dict[Tuple[int, int], PosePrior], wRi_list: List[Rot3] ) -> BinaryMeasurementsPoint3: """Converts the priors from relative Pose3 priors to relative Point3 measurements in world frame. @@ -243,10 +241,7 @@ def compute_inliers( inlier_w_iUj_dict_tracks = {} inlier_cameras: Set[int] = set() for i1, i2 in w_i2Ui1_dict: - if ( - C(i2), - C(i1), - ) in inliers: # there is a flip in indices from w_i2Ui1_dict to inliers. + if (C(i2), C(i1)) in inliers: # there is a flip in indices from w_i2Ui1_dict to inliers. inlier_w_i2Ui1_dict[(i1, i2)] = w_i2Ui1_dict[(i1, i2)] inlier_cameras.add(i1) inlier_cameras.add(i2) @@ -400,9 +395,7 @@ def __run_averaging( List of camera translations in world frame, with as many entries as the number of images. """ logger.info( - "Using %d track measurements and %d camera measurements", - len(w_i2Ui1_dict_tracks), - len(w_i2Ui1_dict), + "Using %d track measurements and %d camera measurements", len(w_i2Ui1_dict_tracks), len(w_i2Ui1_dict) ) noise_model = gtsam.noiseModel.Isotropic.Sigma(NOISE_MODEL_DIMENSION, NOISE_MODEL_SIGMA) diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index 77a7acce8..46eede3df 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -135,7 +135,7 @@ def __reprojection_factors(self, initial_data: GtsfmData, is_fisheye_calibration def _between_factors( self, relative_pose_priors: Dict[Tuple[int, int], PosePrior], - cameras_to_model: List[int], + cameras_to_model: List[int] ) -> NonlinearFactorGraph: """Generate BetweenFactors on relative poses for pose variables.""" graph = NonlinearFactorGraph() @@ -183,7 +183,7 @@ def __calibration_priors( self, initial_data: GtsfmData, cameras_to_model: List[int], - is_fisheye_calibration: bool, + is_fisheye_calibration: bool ) -> NonlinearFactorGraph: """Generate prior factors on calibration parameters of the cameras.""" graph = NonlinearFactorGraph() @@ -197,7 +197,7 @@ def __calibration_priors( initial_data.get_camera(cameras_to_model[0]).calibration(), gtsam.noiseModel.Isotropic.Sigma( calibration_prior_factor_dof, - self._calibration_prior_noise_sigma, + self._calibration_prior_noise_sigma ), ) ) @@ -209,7 +209,7 @@ def __calibration_priors( initial_data.get_camera(i).calibration(), gtsam.noiseModel.Isotropic.Sigma( calibration_prior_factor_dof, - self._calibration_prior_noise_sigma, + self._calibration_prior_noise_sigma ), ) ) @@ -226,7 +226,7 @@ def __construct_factor_graph( """Construct the factor graph with reprojection factors, BetweenFactors, and prior factors.""" is_fisheye_calibration = isinstance( initial_data.get_camera(cameras_to_model[0]), - PinholeCameraCal3Fisheye, + PinholeCameraCal3Fisheye ) graph = NonlinearFactorGraph() @@ -241,7 +241,7 @@ def __construct_factor_graph( graph.push_back( self._between_factors( relative_pose_priors=relative_pose_priors, - cameras_to_model=cameras_to_model, + cameras_to_model=cameras_to_model ) ) graph.push_back( @@ -316,7 +316,7 @@ def run_ba_step( relative_pose_priors: Dict[Tuple[int, int], PosePrior], reproj_error_thresh: Optional[float], verbose: bool = True, - ): + ) -> Tuple[GtsfmData, GtsfmData, List[bool], float]: cameras_to_model = self.__cameras_to_model(initial_data, absolute_pose_priors, relative_pose_priors) graph = self.__construct_factor_graph( cameras_to_model=cameras_to_model, @@ -434,13 +434,13 @@ def evaluate( ba_pose_error_metrics = metrics_utils.compute_ba_pose_metrics( gt_wTi_list=poses_gt, ba_output=aligned_filtered_data, - save_dir=save_dir, + save_dir=save_dir ) ba_metrics.extend(metrics_group=ba_pose_error_metrics) output_tracks_exit_codes = track_utils.classify_tracks3d_with_gt_cameras( tracks=aligned_filtered_data.get_tracks(), - cameras_gt=cameras_gt, + cameras_gt=cameras_gt ) output_tracks_exit_codes_distribution = Counter(output_tracks_exit_codes) @@ -453,11 +453,11 @@ def evaluate( logger.info( "[Result] Mean track length %.3f", - np.mean(aligned_filtered_data.get_track_lengths()), + np.mean(aligned_filtered_data.get_track_lengths()) ) logger.info( "[Result] Median track length %.3f", - np.median(aligned_filtered_data.get_track_lengths()), + np.median(aligned_filtered_data.get_track_lengths()) ) aligned_filtered_data.log_scene_reprojection_error_stats() @@ -472,6 +472,7 @@ def _run_ba_instrumented( save_dir: Optional[str] = None, verbose: bool = True, ): + """TODO""" logger.info( "Input: %d tracks on %d cameras", initial_data.number_tracks(), From 848fdb27c5be88e12e9ea8d85c255ed89d8e1c93 Mon Sep 17 00:00:00 2001 From: senselessdev1 Date: Sat, 12 Aug 2023 16:03:01 -0400 Subject: [PATCH 06/30] fix formatting --- .../rotation/rotation_averaging_base.py | 19 ++++---- .../averaging/translation/averaging_1dsfm.py | 9 ++-- gtsfm/bundle/bundle_adjustment.py | 44 +++++-------------- 3 files changed, 25 insertions(+), 47 deletions(-) diff --git a/gtsfm/averaging/rotation/rotation_averaging_base.py b/gtsfm/averaging/rotation/rotation_averaging_base.py index 30ca629ed..4d9bce196 100644 --- a/gtsfm/averaging/rotation/rotation_averaging_base.py +++ b/gtsfm/averaging/rotation/rotation_averaging_base.py @@ -62,13 +62,13 @@ def _run_rotation_averaging_base( i1Ti2_priors: Dict[Tuple[int, int], PosePrior], wTi_gt: List[Optional[Pose3]], ) -> Tuple[List[Optional[Rot3]], GtsfmMetricsGroup]: - """Run rotation averaging and computes metrics. + """Runs rotation averaging and computes metrics. Args: - num_images: number of poses. - i2Ri1_dict: relative rotations as dictionary (i1, i2): i2Ri1. - i1Ti2_priors: priors on relative poses as dictionary(i1, i2): PosePrior on i1Ti2. - wTi_gt: ground truth global rotations to compare against. + num_images: Number of poses. + i2Ri1_dict: Relative rotations as dictionary (i1, i2): i2Ri1. + i1Ti2_priors: Priors on relative poses as dictionary(i1, i2): PosePrior on i1Ti2. + wTi_gt: Ground truth global rotations to compare against. Returns: Global rotations for each camera pose, i.e. wRi, as a list. The number of entries in the list is @@ -86,13 +86,14 @@ def _run_rotation_averaging_base( return wRis, metrics def evaluate(self, wRi_computed: List[Optional[Rot3]], wTi_gt: List[Optional[Pose3]]) -> GtsfmMetricsGroup: - """Evaluate the global rotations computed by the rotation averaging implementation. + """Evaluates the global rotations computed by the rotation averaging implementation. Args: - wRi_computed: list of global rotations computed. - wTi_gt: ground truth global rotations to compare against. + wRi_computed: List of global rotations computed. + wTi_gt: Ground truth global rotations to compare against. + Raises: - ValueError: if the length of the computed and GT list differ. + ValueError: If the length of the computed and GT list differ. Returns: Metrics on global rotations. diff --git a/gtsfm/averaging/translation/averaging_1dsfm.py b/gtsfm/averaging/translation/averaging_1dsfm.py index 4da039b7c..a7cec20a0 100644 --- a/gtsfm/averaging/translation/averaging_1dsfm.py +++ b/gtsfm/averaging/translation/averaging_1dsfm.py @@ -10,10 +10,11 @@ Authors: Jing Wu, Ayush Baid, Akshay Krishnan """ +import time from collections import defaultdict from enum import Enum from typing import DefaultDict, Dict, List, Optional, Set, Tuple -import time + import gtsam import numpy as np from gtsam import ( @@ -476,7 +477,7 @@ def run_translation_averaging( w_i2Ui1_dict_tracks = {} inlier_computation_start_time = time.time() - (w_i2Ui1_dict_inliers, w_i2Ui1_dict_tracks_inliers, inlier_cameras) = self.compute_inliers( + w_i2Ui1_dict_inliers, w_i2Ui1_dict_tracks_inliers, inlier_cameras = self.compute_inliers( w_i2Ui1_dict, w_i2Ui1_dict_tracks ) inlier_computation_time = time.time() - inlier_computation_start_time @@ -559,12 +560,12 @@ def compute_metrics( ) measured_gt_i2Ui1_dict = {} - for i1, i2 in set.union(inlier_i1_i2_pairs, outlier_i1_i2_pairs): + for (i1, i2) in set.union(inlier_i1_i2_pairs, outlier_i1_i2_pairs): measured_gt_i2Ui1_dict[(i1, i2)] = gt_i2Ui1_dict[(i1, i2)] # Compute estimated poses after the averaging step and align them to ground truth. wTi_list: List[Optional[Pose3]] = [] - for wRi, wti in zip(wRi_list, wti_list): + for (wRi, wti) in zip(wRi_list, wti_list): if wRi is None or wti is None: wTi_list.append(None) else: diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index 46eede3df..d7cd43401 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -133,9 +133,7 @@ def __reprojection_factors(self, initial_data: GtsfmData, is_fisheye_calibration return graph def _between_factors( - self, - relative_pose_priors: Dict[Tuple[int, int], PosePrior], - cameras_to_model: List[int] + self, relative_pose_priors: Dict[Tuple[int, int], PosePrior], cameras_to_model: List[int] ) -> NonlinearFactorGraph: """Generate BetweenFactors on relative poses for pose variables.""" graph = NonlinearFactorGraph() @@ -180,10 +178,7 @@ def __pose_priors( return graph def __calibration_priors( - self, - initial_data: GtsfmData, - cameras_to_model: List[int], - is_fisheye_calibration: bool + self, initial_data: GtsfmData, cameras_to_model: List[int], is_fisheye_calibration: bool ) -> NonlinearFactorGraph: """Generate prior factors on calibration parameters of the cameras.""" graph = NonlinearFactorGraph() @@ -195,10 +190,7 @@ def __calibration_priors( calibration_prior_factor_class( K(self.__map_to_calibration_variable(cameras_to_model[0])), initial_data.get_camera(cameras_to_model[0]).calibration(), - gtsam.noiseModel.Isotropic.Sigma( - calibration_prior_factor_dof, - self._calibration_prior_noise_sigma - ), + gtsam.noiseModel.Isotropic.Sigma(calibration_prior_factor_dof, self._calibration_prior_noise_sigma), ) ) else: @@ -208,8 +200,7 @@ def __calibration_priors( K(self.__map_to_calibration_variable(i)), initial_data.get_camera(i).calibration(), gtsam.noiseModel.Isotropic.Sigma( - calibration_prior_factor_dof, - self._calibration_prior_noise_sigma + calibration_prior_factor_dof, self._calibration_prior_noise_sigma ), ) ) @@ -224,10 +215,7 @@ def __construct_factor_graph( relative_pose_priors: Dict[Tuple[int, int], PosePrior], ) -> NonlinearFactorGraph: """Construct the factor graph with reprojection factors, BetweenFactors, and prior factors.""" - is_fisheye_calibration = isinstance( - initial_data.get_camera(cameras_to_model[0]), - PinholeCameraCal3Fisheye - ) + is_fisheye_calibration = isinstance(initial_data.get_camera(cameras_to_model[0]), PinholeCameraCal3Fisheye) graph = NonlinearFactorGraph() @@ -239,10 +227,7 @@ def __construct_factor_graph( ) ) graph.push_back( - self._between_factors( - relative_pose_priors=relative_pose_priors, - cameras_to_model=cameras_to_model - ) + self._between_factors(relative_pose_priors=relative_pose_priors, cameras_to_model=cameras_to_model) ) graph.push_back( self.__pose_priors( @@ -432,15 +417,12 @@ def evaluate( # align the sparse multi-view estimate after BA to the ground truth pose graph. aligned_filtered_data = filtered_data.align_via_Sim3_to_poses(wTi_list_ref=poses_gt) ba_pose_error_metrics = metrics_utils.compute_ba_pose_metrics( - gt_wTi_list=poses_gt, - ba_output=aligned_filtered_data, - save_dir=save_dir + gt_wTi_list=poses_gt, ba_output=aligned_filtered_data, save_dir=save_dir ) ba_metrics.extend(metrics_group=ba_pose_error_metrics) output_tracks_exit_codes = track_utils.classify_tracks3d_with_gt_cameras( - tracks=aligned_filtered_data.get_tracks(), - cameras_gt=cameras_gt + tracks=aligned_filtered_data.get_tracks(), cameras_gt=cameras_gt ) output_tracks_exit_codes_distribution = Counter(output_tracks_exit_codes) @@ -451,14 +433,8 @@ def evaluate( ba_metrics.add_metrics(metrics_utils.get_stats_for_sfmdata(aligned_filtered_data, suffix="_filtered")) # ba_metrics.save_to_json(os.path.join(METRICS_PATH, "bundle_adjustment_metrics.json")) - logger.info( - "[Result] Mean track length %.3f", - np.mean(aligned_filtered_data.get_track_lengths()) - ) - logger.info( - "[Result] Median track length %.3f", - np.median(aligned_filtered_data.get_track_lengths()) - ) + logger.info("[Result] Mean track length %.3f", np.mean(aligned_filtered_data.get_track_lengths())) + logger.info("[Result] Median track length %.3f", np.median(aligned_filtered_data.get_track_lengths())) aligned_filtered_data.log_scene_reprojection_error_stats() return ba_metrics From 2a3670eb6ac0334b9d93574c79ee062b562ff9d3 Mon Sep 17 00:00:00 2001 From: senselessdev1 Date: Sat, 12 Aug 2023 16:10:43 -0400 Subject: [PATCH 07/30] fix formatting --- gtsfm/averaging/rotation/shonan.py | 1 - .../averaging/translation/averaging_1dsfm.py | 4 ++-- gtsfm/bundle/bundle_adjustment.py | 24 ++++++------------- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/gtsfm/averaging/rotation/shonan.py b/gtsfm/averaging/rotation/shonan.py index 6e7c7158e..a6025b8f2 100644 --- a/gtsfm/averaging/rotation/shonan.py +++ b/gtsfm/averaging/rotation/shonan.py @@ -10,7 +10,6 @@ Authors: Jing Wu, Ayush Baid, John Lambert """ -import time from typing import Dict, List, Optional, Set, Tuple import gtsam diff --git a/gtsfm/averaging/translation/averaging_1dsfm.py b/gtsfm/averaging/translation/averaging_1dsfm.py index a7cec20a0..b2777bb9b 100644 --- a/gtsfm/averaging/translation/averaging_1dsfm.py +++ b/gtsfm/averaging/translation/averaging_1dsfm.py @@ -241,13 +241,13 @@ def compute_inliers( inlier_w_i2Ui1_dict = {} inlier_w_iUj_dict_tracks = {} inlier_cameras: Set[int] = set() - for i1, i2 in w_i2Ui1_dict: + for (i1, i2) in w_i2Ui1_dict: if (C(i2), C(i1)) in inliers: # there is a flip in indices from w_i2Ui1_dict to inliers. inlier_w_i2Ui1_dict[(i1, i2)] = w_i2Ui1_dict[(i1, i2)] inlier_cameras.add(i1) inlier_cameras.add(i2) - for j, i in w_iUj_dict_tracks: + for (j, i) in w_iUj_dict_tracks: # Same as above, `inliers` contains symbols that are flipped - C(i), L(j). # Only add an inlier camera-track measurements if the camera has other camera-camera inliers. if (C(i), L(j)) in inliers and i in inlier_cameras: diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index d7cd43401..57f0794be 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -241,9 +241,7 @@ def __construct_factor_graph( # Also add a prior on the position of the first landmark to fix the scale graph.push_back( gtsam.PriorFactorPoint3( - P(0), - initial_data.get_track(0).point3(), - gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1), + P(0), initial_data.get_track(0).point3(), gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1) ) ) @@ -259,10 +257,7 @@ def __initial_values(self, initial_data: GtsfmData) -> Values: initial_values.insert(X(i), camera.pose()) if not self._shared_calib or loop_idx == 0: # add only one value if calibrations are shared - initial_values.insert( - K(self.__map_to_calibration_variable(i)), - camera.calibration(), - ) + initial_values.insert(K(self.__map_to_calibration_variable(i)), camera.calibration()) # add each SfmTrack for j in range(initial_data.number_tracks()): @@ -302,6 +297,7 @@ def run_ba_step( reproj_error_thresh: Optional[float], verbose: bool = True, ) -> Tuple[GtsfmData, GtsfmData, List[bool], float]: + """TODO""" cameras_to_model = self.__cameras_to_model(initial_data, absolute_pose_priors, relative_pose_priors) graph = self.__construct_factor_graph( cameras_to_model=cameras_to_model, @@ -377,12 +373,7 @@ def run_ba( if num_ba_steps > 1: logger.info( "[BA Step %d/%d] Error: %.2f, Number of tracks: %d" - % ( - step + 1, - num_ba_steps, - final_error, - filtered_result.number_tracks(), - ) + % (step + 1, num_ba_steps, final_error, filtered_result.number_tracks()) ) return optimized_data, filtered_result, valid_mask @@ -404,8 +395,7 @@ def evaluate( Metrics group containing metrics for both filtered and unfiltered BA results. """ ba_metrics = GtsfmMetricsGroup( - name=METRICS_GROUP, - metrics=metrics_utils.get_stats_for_sfmdata(unfiltered_data, suffix="_unfiltered"), + name=METRICS_GROUP, metrics=metrics_utils.get_stats_for_sfmdata(unfiltered_data, suffix="_unfiltered") ) poses_gt = [cam.pose() if cam is not None else None for cam in cameras_gt] @@ -447,7 +437,7 @@ def _run_ba_instrumented( cameras_gt: List[Optional[gtsfm_types.CAMERA_TYPE]], save_dir: Optional[str] = None, verbose: bool = True, - ): + ) -> Tuple[GtsfmData, GtsfmData, List[bool], GtsfmMetricsGroup]: """TODO""" logger.info( "Input: %d tracks on %d cameras", @@ -524,7 +514,7 @@ def create_computation_graph( absolute_pose_priors, relative_pose_priors, cameras_gt, - save_dir, + save_dir=save_dir, ) return filtered_sfm_data, metrics_graph From dbeebfec267d26b1af5e5f81dc6d4b6e13ec567c Mon Sep 17 00:00:00 2001 From: senselessdev1 Date: Sat, 12 Aug 2023 16:25:28 -0400 Subject: [PATCH 08/30] fix formatting and docstrings --- gtsfm/bundle/bundle_adjustment.py | 39 ++++++++++++++----------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index 57f0794be..ebbaafb92 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -289,7 +289,7 @@ def __cameras_to_model( return sorted(list(cameras)) - def run_ba_step( + def run_ba_at_threshold( self, initial_data: GtsfmData, absolute_pose_priors: List[Optional[PosePrior]], @@ -297,7 +297,7 @@ def run_ba_step( reproj_error_thresh: Optional[float], verbose: bool = True, ) -> Tuple[GtsfmData, GtsfmData, List[bool], float]: - """TODO""" + """Run bundle adjustment and filter the resulting tracks by reprojection error.""" cameras_to_model = self.__cameras_to_model(initial_data, absolute_pose_priors, relative_pose_priors) graph = self.__construct_factor_graph( cameras_to_model=cameras_to_model, @@ -311,31 +311,24 @@ def run_ba_step( # Print error. final_error = graph.error(result_values) if verbose: - logger.info(f"initial error: {graph.error(initial_values):.2f}") - logger.info(f"final error: {final_error:.2f}") + logger.info("initial error: %.2f", graph.error(initial_values)) + logger.info("final error: %.2f", final_error) - # Construct the results. + # Convert the `Values` results to a `GtsfmData` instance. optimized_data = values_to_gtsfm_data(result_values, initial_data, self._shared_calib) # Filter landmarks by reprojection error. if reproj_error_thresh is not None: if verbose: - logger.info( - "[Result] Number of tracks before filtering: %d", - optimized_data.number_tracks(), - ) + logger.info("[Result] Number of tracks before filtering: %d", optimized_data.number_tracks()) filtered_result, valid_mask = optimized_data.filter_landmarks(reproj_error_thresh) if verbose: - logger.info( - "[Result] Number of tracks after filtering: %d", - filtered_result.number_tracks(), - ) + logger.info("[Result] Number of tracks after filtering: %d", filtered_result.number_tracks()) else: valid_mask = [True] * optimized_data.number_tracks() filtered_result = optimized_data - # Set intermediate result as initial condition for next step. return optimized_data, filtered_result, valid_mask, final_error def run_ba( @@ -361,7 +354,8 @@ def run_ba( """ num_ba_steps = len(self._reproj_error_thresholds) for step, reproj_error_thresh in enumerate(self._reproj_error_thresholds): - (optimized_data, filtered_result, valid_mask, final_error,) = self.run_ba_step( + # Use intermediate result as initial condition for next step. + (optimized_data, filtered_result, valid_mask, final_error) = self.run_ba_at_threshold( initial_data, absolute_pose_priors, relative_pose_priors, @@ -460,20 +454,21 @@ def _run_ba_instrumented( num_ba_steps = len(self._reproj_error_thresholds) for step, reproj_error_thresh in enumerate(self._reproj_error_thresholds): step_start_time = time.time() - (optimized_data, filtered_result, valid_mask, final_error,) = self.run_ba_step( - initial_data, - absolute_pose_priors, - relative_pose_priors, - reproj_error_thresh, - verbose, + (optimized_data, filtered_result, valid_mask, final_error) = self.run_ba_at_threshold( + initial_data=initial_data, + absolute_pose_priors=absolute_pose_priors, + relative_pose_priors=relative_pose_priors, + reproj_error_thresh=reproj_error_thresh, + verbose=verbose, ) step_times.append(time.time() - step_start_time) # Print intermediate results. if num_ba_steps > 1: logger.info( - "[BA Step %d/%d] Error: %.2f, Number of tracks: %d" + "[BA Stage @ thresh=%.2f px %d/%d] Error: %.2f, Number of tracks: %d" % ( + reproj_error_thresh, step + 1, num_ba_steps, final_error, From 03791799d723b7f68f10456e9c115b82aa39a4e3 Mon Sep 17 00:00:00 2001 From: senselessdev1 Date: Sat, 12 Aug 2023 16:47:58 -0400 Subject: [PATCH 09/30] fix missing arg and improve fn naming --- .../averaging/rotation/rotation_averaging_base.py | 4 +++- gtsfm/bundle/bundle_adjustment.py | 14 +++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/gtsfm/averaging/rotation/rotation_averaging_base.py b/gtsfm/averaging/rotation/rotation_averaging_base.py index 4d9bce196..31c06dbf1 100644 --- a/gtsfm/averaging/rotation/rotation_averaging_base.py +++ b/gtsfm/averaging/rotation/rotation_averaging_base.py @@ -129,6 +129,8 @@ def create_computation_graph( global rotations wrapped using dask.delayed. """ - wRis, metrics = dask.delayed(self._run_rotation_averaging_base, nout=2)(num_images, i2Ri1_graph, i1Ti2_priors) + wRis, metrics = dask.delayed(self._run_rotation_averaging_base, nout=2)( + num_images, i2Ri1_dict=i2Ri1_graph, i1Ti2_priors=i1Ti2_priors, wTi_gt=gt_wTi_list + ) return wRis, metrics diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index ebbaafb92..ec11a890f 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -423,7 +423,7 @@ def evaluate( return ba_metrics - def _run_ba_instrumented( + def _run_ba_with_profiling( self, initial_data: GtsfmData, absolute_pose_priors: List[Optional[PosePrior]], @@ -439,15 +439,11 @@ def _run_ba_instrumented( len(initial_data.get_valid_camera_indices()), ) if initial_data.number_tracks() == 0 or len(initial_data.get_valid_camera_indices()) == 0: - # no cameras or tracks to optimize, so bundle adjustment is not possible + # No cameras or tracks to optimize, so bundle adjustment is not possible. logger.error( "Bundle adjustment aborting, optimization cannot be performed without any tracks or any cameras." ) - return ( - initial_data, - initial_data, - [False] * initial_data.number_tracks(), - ) + return initial_data, initial_data, [False] * initial_data.number_tracks() step_times = [] start_time = time.time() @@ -480,7 +476,7 @@ def _run_ba_instrumented( metrics = self.evaluate(optimized_data, filtered_result, cameras_gt, save_dir) for i, step_time in enumerate(step_times): metrics.add_metric(GtsfmMetric(f"step_{i}_run_duration_sec", step_time)) - metrics.add_metric(GtsfmMetric(f"total_run_duration_sec", total_time)) + metrics.add_metric(GtsfmMetric("total_run_duration_sec", total_time)) return optimized_data, filtered_result, valid_mask, metrics @@ -504,7 +500,7 @@ def create_computation_graph( Metrics group for BA results, wrapped up using dask.delayed """ - _, filtered_sfm_data, _, metrics_graph = dask.delayed(self._run_ba_instrumented, nout=4)( + _, filtered_sfm_data, _, metrics_graph = dask.delayed(self._run_ba_with_profiling, nout=4)( sfm_data_graph, absolute_pose_priors, relative_pose_priors, From 9733be9aca508f56b7b8f272fe153fab9f44935c Mon Sep 17 00:00:00 2001 From: senselessdev1 Date: Sat, 12 Aug 2023 16:53:28 -0400 Subject: [PATCH 10/30] reformat --- gtsfm/bundle/bundle_adjustment.py | 18 +++++++++--------- gtsfm/two_view_estimator.py | 1 + 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index ec11a890f..cb4a72ec7 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -115,11 +115,11 @@ def __reprojection_factors(self, initial_data: GtsfmData, is_fisheye_calibration sfm_factor_class = GeneralSFMFactor2Cal3Fisheye if is_fisheye_calibration else GeneralSFMFactor2Cal3Bundler for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) # SfmTrack - # retrieve the SfmMeasurement objects + # Retrieve the SfmMeasurement objects. for m_idx in range(track.numberMeasurements()): - # i represents the camera index, and uv is the 2d measurement + # `i` represents the camera index, and `uv` is the 2d measurement i, uv = track.measurement(m_idx) - # note use of shorthand symbols C and P + # Note use of shorthand symbols `X` and `P`. graph.push_back( sfm_factor_class( uv, @@ -219,7 +219,7 @@ def __construct_factor_graph( graph = NonlinearFactorGraph() - # Create a factor graph + # Create a factor graph. graph.push_back( self.__reprojection_factors( initial_data=initial_data, @@ -251,7 +251,7 @@ def __initial_values(self, initial_data: GtsfmData) -> Values: """Initialize all the variables in the factor graph.""" initial_values = gtsam.Values() - # add each camera + # Add each camera. for loop_idx, i in enumerate(initial_data.get_valid_camera_indices()): camera = initial_data.get_camera(i) initial_values.insert(X(i), camera.pose()) @@ -259,7 +259,7 @@ def __initial_values(self, initial_data: GtsfmData) -> Values: # add only one value if calibrations are shared initial_values.insert(K(self.__map_to_calibration_variable(i)), camera.calibration()) - # add each SfmTrack + # Add each SfmTrack. for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) initial_values.insert(P(j), track.point3()) @@ -491,9 +491,9 @@ def create_computation_graph( """Create the computation graph for performing bundle adjustment. Args: - sfm_data_graph: an GtsfmData object wrapped up using dask.delayed - absolute_pose_priors: priors on the poses of the cameras (not delayed). - relative_pose_priors: priors on poses between cameras (not delayed). + sfm_data_graph: An GtsfmData object wrapped up using dask.delayed. + absolute_pose_priors: Priors on the poses of the cameras (not delayed). + relative_pose_priors: Priors on poses between cameras (not delayed). Returns: GtsfmData aligned to GT (if provided), wrapped up using dask.delayed diff --git a/gtsfm/two_view_estimator.py b/gtsfm/two_view_estimator.py index ecf6bc64b..2d5e02a50 100644 --- a/gtsfm/two_view_estimator.py +++ b/gtsfm/two_view_estimator.py @@ -154,6 +154,7 @@ def bundle_adjust( i2Ri1_initial: The relative rotation to be used as initial rotation between cameras. i2Ui1_initial: The relative unit direction, to be used to initialize initial translation between cameras. i2Ti1_prior: Prior on the relative pose for cameras (i1, i2). + Returns: Optimized relative rotation i2Ri1. Optimized unit translation i2Ui1. From 6e53dfe9433ef05b9c6d17c169d982dc554d3512 Mon Sep 17 00:00:00 2001 From: Akshay Krishnan Date: Sun, 13 Aug 2023 01:32:05 +0000 Subject: [PATCH 11/30] add frontend timing metrics --- gtsfm/runner/gtsfm_runner_base.py | 47 +++++++++++++-- gtsfm/scene_optimizer.py | 99 +++---------------------------- gtsfm/two_view_estimator.py | 55 ++++++++++++++++- gtsfm/utils/metrics.py | 3 + 4 files changed, 107 insertions(+), 97 deletions(-) diff --git a/gtsfm/runner/gtsfm_runner_base.py b/gtsfm/runner/gtsfm_runner_base.py index e9cf89b0a..9a902f734 100644 --- a/gtsfm/runner/gtsfm_runner_base.py +++ b/gtsfm/runner/gtsfm_runner_base.py @@ -5,7 +5,7 @@ import time from abc import abstractmethod, abstractproperty from pathlib import Path -from typing import Any, Dict, Tuple +from typing import Any, Dict, List, Tuple import dask import hydra @@ -15,8 +15,12 @@ from hydra.utils import instantiate from omegaconf import OmegaConf +import gtsfm.evaluation.metrics_report as metrics_report import gtsfm.utils.logger as logger_utils +import gtsfm.utils.metrics as metrics_utils from gtsfm.common.gtsfm_data import GtsfmData +from gtsfm import two_view_estimator +from gtsfm.evaluation.metrics import GtsfmMetric, GtsfmMetricsGroup from gtsfm.frontend.correspondence_generator.image_correspondence_generator import ImageCorrespondenceGenerator from gtsfm.loader.loader_base import LoaderBase from gtsfm.retriever.retriever_base import ImageMatchingRegime @@ -27,6 +31,7 @@ logger = logger_utils.get_logger() DEFAULT_OUTPUT_ROOT = Path(__file__).resolve().parent.parent.parent +REACT_METRICS_PATH = DEFAULT_OUTPUT_ROOT / "rtf_vis_tool" / "src" / "result_metrics" class GtsfmRunnerBase: @@ -257,13 +262,11 @@ def run(self) -> GtsfmData: self.loader, plots_output_dir=self.scene_optimizer._plot_base_path ) retriever_metrics = self.scene_optimizer.retriever.evaluate(self.loader, image_pair_indices) - retriever_metrics.save_to_json( - os.path.join(self.parsed_args.output_root, "result_metrics", "retriever_metrics" + ".json") - ) intrinsics = self.loader.get_all_intrinsics() with performance_report(filename="correspondence-generator-dask-report.html"): + correspondence_generation_start_time = time.time() ( keypoints_list, putative_corr_idxs_dict, @@ -272,7 +275,9 @@ def run(self) -> GtsfmData: self.loader.get_all_images_as_futures(client), image_pair_indices, ) + correspondence_generation_duration_sec = time.time() - correspondence_generation_start_time + two_view_estimation_start_time = time.time() two_view_results_dict = run_two_view_estimator_as_futures( client, self.scene_optimizer.two_view_estimator, @@ -283,10 +288,19 @@ def run(self) -> GtsfmData: self.loader.get_gt_cameras(), gt_scene_mesh=self.loader.get_gt_scene_trimesh(), ) + two_view_estimation_duration_sec = time.time() - two_view_estimation_start_time i2Ri1_dict, i2Ui1_dict, v_corr_idxs_dict, two_view_reports_dict = unzip_two_view_results(two_view_results_dict) + two_view_metrics = two_view_estimator.aggregate_frontend_metrics( + two_view_reports_dict=two_view_reports_dict, + angular_err_threshold_deg=self.scene_optimizer._pose_angular_error_thresh, + metric_group_name="verifier_summary_{}".format(two_view_estimator.POST_ISP_REPORT_TAG), + ) + two_view_metrics.add_metric(GtsfmMetric('total_correspondence_generation_duration_sec', correspondence_generation_duration_sec)) + two_view_metrics.add_metric(GtsfmMetric('total_two_view_estimation_duration_sec', two_view_estimation_duration_sec)) + all_metrics_groups = [retriever_metrics, two_view_metrics] - delayed_sfm_result, delayed_io = self.scene_optimizer.create_computation_graph( + delayed_sfm_result, delayed_io, delayed_mvo_metrics_groups = self.scene_optimizer.create_computation_graph( keypoints_list=keypoints_list, i2Ri1_dict=i2Ri1_dict, i2Ui1_dict=i2Ui1_dict, @@ -303,9 +317,12 @@ def run(self) -> GtsfmData: ) with performance_report(filename="scene-optimizer-dask-report.html"): - sfm_result, *io = dask.compute(delayed_sfm_result, *delayed_io) + sfm_result, *other_results = dask.compute(delayed_sfm_result, *delayed_io, *delayed_mvo_metrics_groups) + mvo_metrics_groups = [x for x in other_results if isinstance(x, GtsfmMetricsGroup)] assert isinstance(sfm_result, GtsfmData) + all_metrics_groups.extend(mvo_metrics_groups) + save_metrics_reports(all_metrics_groups, os.path.join(self.scene_optimizer.output_root, "result_metrics")) end_time = time.time() duration_sec = end_time - start_time @@ -340,3 +357,21 @@ def unzip_two_view_results( two_view_reports_dict[(i1, i2)] = two_view_output[5] return i2Ri1_dict, i2Ui1_dict, v_corr_idxs_dict, two_view_reports_dict + + +def save_metrics_reports(metrics_group_list: List[GtsfmMetricsGroup], metrics_path: str) -> None: + """Saves metrics to JSON and HTML report. + + Args: + metrics_graph: List of GtsfmMetricsGroup from different modules wrapped as Delayed. + metrics_path: Path to directory where computed metrics will be saved. + + Returns: + List of delayed objects after saving metrics. + """ + + # Save metrics to JSON + metrics_utils.save_metrics_as_json(metrics_group_list, metrics_path) + metrics_utils.save_metrics_as_json(metrics_group_list, str(REACT_METRICS_PATH)) + + metrics_report.generate_metrics_report_html(metrics_group_list, os.path.join(metrics_path, "gtsfm_metrics_report.html"), None) diff --git a/gtsfm/scene_optimizer.py b/gtsfm/scene_optimizer.py index be665a92c..69f6cb385 100644 --- a/gtsfm/scene_optimizer.py +++ b/gtsfm/scene_optimizer.py @@ -16,12 +16,10 @@ from trimesh import Trimesh import gtsfm.common.types as gtsfm_types -import gtsfm.evaluation.metrics_report as metrics_report import gtsfm.two_view_estimator as two_view_estimator import gtsfm.utils.ellipsoid as ellipsoid_utils import gtsfm.utils.io as io_utils import gtsfm.utils.logger as logger_utils -import gtsfm.utils.metrics as metrics_utils import gtsfm.utils.viz as viz_utils from gtsfm.common.gtsfm_data import GtsfmData from gtsfm.common.image import Image @@ -54,9 +52,6 @@ pil_logger = logging.getLogger("PIL") pil_logger.setLevel(logging.INFO) -# number of digits (significant figures) to include in each entry of error metrics -PRINT_NUM_SIG_FIGS = 2 - class SceneOptimizer: """Wrapper combining different modules to run the whole pipeline on a @@ -144,7 +139,7 @@ def create_computation_graph( cameras_gt: List[Optional[gtsfm_types.CAMERA_TYPE]], gt_wTi_list: List[Optional[Pose3]], gt_scene_mesh: Optional[Trimesh] = None, - ) -> Tuple[Delayed, List[Delayed]]: + ) -> Tuple[Delayed, List[Delayed], List[Delayed]]: """The SceneOptimizer plate calls the FeatureExtractor and TwoViewEstimator plates several times.""" logger.info(f"Results, plots, and metrics will be saved at {self.output_root}") @@ -177,6 +172,7 @@ def create_computation_graph( # Persist all front-end metrics and their summaries. # TODO(akshay-krishnan): this delays saving the frontend reports until MVO has completed, not ideal. metrics_graph_list: List[Delayed] = [] + save_retrieval_metrics = self.retriever._matching_regime in [ImageMatchingRegime.RETRIEVAL, ImageMatchingRegime.SEQUENTIAL_WITH_RETRIEVAL] annotation = dask.annotate(workers=self._output_worker) if self._output_worker else dask.annotate() with annotation: delayed_results.append( @@ -184,18 +180,11 @@ def create_computation_graph( two_view_reports, images, filename="two_view_report_{}.json".format(POST_ISP_REPORT_TAG), - matching_regime=self.retriever._matching_regime, + save_retrieval_metrics=save_retrieval_metrics, metrics_path=self._metrics_path, plot_base_path=self._plot_base_path, ) ) - metrics_graph_list.append( - dask.delayed(two_view_estimator.aggregate_frontend_metrics)( - two_view_reports, - self._pose_angular_error_thresh, - metric_group_name="verifier_summary_{}".format(POST_ISP_REPORT_TAG), - ) - ) # TODO(Ayush): pass only image name instead of the whole image. And delete images from memory. delayed_results.append( @@ -203,7 +192,7 @@ def create_computation_graph( two_view_reports_post_viewgraph_estimator, images, filename="two_view_report_{}.json".format(VIEWGRAPH_REPORT_TAG), - matching_regime=self.retriever._matching_regime, + save_retrieval_metrics=save_retrieval_metrics, metrics_path=self._metrics_path, plot_base_path=self._plot_base_path, ) @@ -276,11 +265,9 @@ def create_computation_graph( # Save metrics to JSON and generate HTML report. annotation = dask.annotate(workers=self._output_worker) if self._output_worker else dask.annotate() - with annotation: - delayed_results.extend(save_metrics_reports(metrics_graph_list, metrics_path=self._metrics_path)) # return the entry with just the sfm result - return ba_output_graph, delayed_results + return ba_output_graph, delayed_results, metrics_graph_list def get_image_dictionary(image_list: List[Image]) -> Dict[int, Image]: @@ -323,7 +310,7 @@ def save_visualizations( plot_ba_input_path: Path, plot_results_path: Path, ) -> List[Delayed]: - """Save SfmData before and after bundle adjustment and camera poses for visualization. + """Save SfmData before and after bundle adjustment and camera poses for visualization.i Accepts delayed GtsfmData before and after bundle adjustment, along with GT poses, saves them and returns a delayed object. @@ -422,44 +409,13 @@ def save_gtsfm_data( return saving_graph_list - -def save_metrics_reports(metrics_graph_list: List[Delayed], metrics_path: Path) -> List[Delayed]: - """Saves metrics to JSON and HTML report. - - Args: - metrics_graph: List of GtsfmMetricsGroup from different modules wrapped as Delayed. - metrics_path: Path to directory where computed metrics will be saved. - - Returns: - List of delayed objects after saving metrics. - """ - save_metrics_graph_list: List[Delayed] = [] - - if len(metrics_graph_list) == 0: - return save_metrics_graph_list - - # Save metrics to JSON - save_metrics_graph_list.append(dask.delayed(metrics_utils.save_metrics_as_json)(metrics_graph_list, metrics_path)) - save_metrics_graph_list.append( - dask.delayed(metrics_utils.save_metrics_as_json)(metrics_graph_list, REACT_METRICS_PATH) - ) - save_metrics_graph_list.append( - dask.delayed(metrics_report.generate_metrics_report_html)( - metrics_graph_list, - os.path.join(metrics_path, "gtsfm_metrics_report.html"), - None, - ) - ) - return save_metrics_graph_list - - def save_full_frontend_metrics( two_view_report_dict: Dict[Tuple[int, int], TwoViewEstimationReport], images: List[Image], filename: str, - matching_regime: ImageMatchingRegime, metrics_path: Path, plot_base_path: Path, + save_retrieval_metrics: bool = True, ) -> None: """Converts the TwoViewEstimationReports for all image pairs to a Dict and saves it as JSON. @@ -471,40 +427,7 @@ def save_full_frontend_metrics( metrics_path: Path to directory where metrics will be saved. plot_base_path: Path to directory where plots will be saved. """ - metrics_list = [] - - round_fn = lambda x: round(x, PRINT_NUM_SIG_FIGS) if x else None - - for (i1, i2), report in two_view_report_dict.items(): - # Note: if GT is unknown, then R_error_deg, U_error_deg, and inlier_ratio_gt_model will be None - metrics_list.append( - { - "i1": int(i1), - "i2": int(i2), - "i1_filename": images[i1].file_name, - "i2_filename": images[i2].file_name, - "rotation_angular_error": round_fn(report.R_error_deg), - "translation_angular_error": round_fn(report.U_error_deg), - "num_inliers_gt_model": int(report.num_inliers_gt_model) - if report.num_inliers_gt_model is not None - else None, - "inlier_ratio_gt_model": round_fn(report.inlier_ratio_gt_model), - "inlier_avg_reproj_error_gt_model": round_fn( - np.nanmean(report.reproj_error_gt_model[report.v_corr_idxs_inlier_mask_gt]) - ) - if report.reproj_error_gt_model is not None and report.v_corr_idxs_inlier_mask_gt is not None - else None, - "outlier_avg_reproj_error_gt_model": round_fn( - np.nanmean(report.reproj_error_gt_model[np.logical_not(report.v_corr_idxs_inlier_mask_gt)]) - ) - if report.reproj_error_gt_model is not None and report.v_corr_idxs_inlier_mask_gt is not None - else None, - "inlier_ratio_est_model": round_fn(report.inlier_ratio_est_model), - "num_inliers_est_model": int(report.num_inliers_est_model) - if report.num_inliers_est_model is not None - else None, - } - ) + metrics_list = two_view_estimator.get_two_view_reports_summary(two_view_report_dict, images) io_utils.save_json_file(os.path.join(metrics_path, filename), metrics_list) @@ -513,12 +436,8 @@ def save_full_frontend_metrics( # All retreival metrics need GT, no need to save them if GT is not available. gt_available = any([report.R_error_deg is not None for report in two_view_report_dict.values()]) - if not gt_available: - return - if matching_regime not in [ImageMatchingRegime.RETRIEVAL, ImageMatchingRegime.SEQUENTIAL_WITH_RETRIEVAL]: - return - if "VIEWGRAPH_2VIEW_REPORT" in filename: + if save_retrieval_metrics and "VIEWGRAPH_2VIEW_REPORT" in filename and gt_available: # must come after two-view report file is written to disk in the Dask dependency graph. _save_retrieval_two_view_metrics(metrics_path, plot_base_path) diff --git a/gtsfm/two_view_estimator.py b/gtsfm/two_view_estimator.py index 2d5e02a50..f20bae9d6 100644 --- a/gtsfm/two_view_estimator.py +++ b/gtsfm/two_view_estimator.py @@ -17,6 +17,7 @@ import gtsfm.utils.metrics as metric_utils from gtsfm.bundle.two_view_ba import TwoViewBundleAdjustment from gtsfm.common.gtsfm_data import GtsfmData +from gtsfm.common.image import Image from gtsfm.common.keypoints import Keypoints from gtsfm.common.pose_prior import PosePrior from gtsfm.common.sfm_track import SfmMeasurement, SfmTrack2d @@ -425,7 +426,7 @@ def aggregate_frontend_metrics( two_view_reports_dict: Dict[Tuple[int, int], TwoViewEstimationReport], angular_err_threshold_deg: float, metric_group_name: str, -) -> None: +) -> GtsfmMetricsGroup: """Aggregate the front-end metrics to log summary statistics. We define "pose error" as the maximum of the angular errors in rotation and translation, per: @@ -585,3 +586,55 @@ def apply_two_view_estimator( two_view_output_dict = client.gather(two_view_output_futures) return two_view_output_dict + + +def get_two_view_reports_summary( + two_view_report_dict: Dict[Tuple[int, int], TwoViewEstimationReport], + images: List[Image], +) -> List[Dict[str, Any]]: + """Converts the TwoViewEstimationReports for all image pairs to a Dict and saves it as JSON. + + Args: + two_view_report_dict: Front-end metrics for pairs of images. + images: List of all images for this scene, in order of image/frame index. + + Returns: + List of dictionaries, where each dictionary contains the metrics for an image pair. + """ + def round_fn(x): + return round(x, 2) if x else None + + metrics_list = [] + + for (i1, i2), report in two_view_report_dict.items(): + # Note: if GT is unknown, then R_error_deg, U_error_deg, and inlier_ratio_gt_model will be None + metrics_list.append( + { + "i1": int(i1), + "i2": int(i2), + "i1_filename": images[i1].file_name, + "i2_filename": images[i2].file_name, + "rotation_angular_error": round_fn(report.R_error_deg), + "translation_angular_error": round_fn(report.U_error_deg), + "num_inliers_gt_model": int(report.num_inliers_gt_model) + if report.num_inliers_gt_model is not None + else None, + "inlier_ratio_gt_model": round_fn(report.inlier_ratio_gt_model), + "inlier_avg_reproj_error_gt_model": round_fn( + np.nanmean(report.reproj_error_gt_model[report.v_corr_idxs_inlier_mask_gt]) + ) + if report.reproj_error_gt_model is not None and report.v_corr_idxs_inlier_mask_gt is not None + else None, + "outlier_avg_reproj_error_gt_model": round_fn( + np.nanmean(report.reproj_error_gt_model[np.logical_not(report.v_corr_idxs_inlier_mask_gt)]) + ) + if report.reproj_error_gt_model is not None and report.v_corr_idxs_inlier_mask_gt is not None + else None, + "inlier_ratio_est_model": round_fn(report.inlier_ratio_est_model), + "num_inliers_est_model": int(report.num_inliers_est_model) + if report.num_inliers_est_model is not None + else None, + } + ) + return metrics_list + \ No newline at end of file diff --git a/gtsfm/utils/metrics.py b/gtsfm/utils/metrics.py index 5c08c2177..42702f8ae 100644 --- a/gtsfm/utils/metrics.py +++ b/gtsfm/utils/metrics.py @@ -16,6 +16,7 @@ from trimesh import Trimesh import gtsfm.utils.geometry_comparisons as comp_utils +import gtsfm.utils.io as io_utils import gtsfm.utils.logger as logger_utils import gtsfm.utils.verification as verification_utils from gtsfm.common.gtsfm_data import GtsfmData @@ -34,6 +35,8 @@ logger = logger_utils.get_logger() +# number of digits (significant figures) to include in each entry of error metrics +PRINT_NUM_SIG_FIGS = 2 def compute_correspondence_metrics( keypoints_i1: Keypoints, From 2c6f27107a8def34e890a47db272fb7a0e259160 Mon Sep 17 00:00:00 2001 From: Akshay Krishnan Date: Sun, 13 Aug 2023 01:35:25 +0000 Subject: [PATCH 12/30] remove PRINT_NUM_SIG (usage was removed earlier) --- gtsfm/common/gtsfm_data.py | 2 -- gtsfm/utils/metrics.py | 3 --- 2 files changed, 5 deletions(-) diff --git a/gtsfm/common/gtsfm_data.py b/gtsfm/common/gtsfm_data.py index 23008f6d8..ea0c09fd8 100644 --- a/gtsfm/common/gtsfm_data.py +++ b/gtsfm/common/gtsfm_data.py @@ -19,8 +19,6 @@ logger = logging.getLogger(__name__) EQUALITY_TOLERANCE = 1e-5 -PRINT_NUM_SIG_FIGS = 2 - class GtsfmData: """Class containing cameras and tracks, essentially describing the complete 3D scene. diff --git a/gtsfm/utils/metrics.py b/gtsfm/utils/metrics.py index 42702f8ae..dc9ef3559 100644 --- a/gtsfm/utils/metrics.py +++ b/gtsfm/utils/metrics.py @@ -35,9 +35,6 @@ logger = logger_utils.get_logger() -# number of digits (significant figures) to include in each entry of error metrics -PRINT_NUM_SIG_FIGS = 2 - def compute_correspondence_metrics( keypoints_i1: Keypoints, keypoints_i2: Keypoints, From 963cc64e8afb4214bd85cc66a208f729f1700d14 Mon Sep 17 00:00:00 2001 From: Akshay Krishnan Date: Sun, 13 Aug 2023 01:40:49 +0000 Subject: [PATCH 13/30] some formatting --- gtsfm/evaluation/metrics_report.py | 4 ++-- gtsfm/runner/gtsfm_runner_base.py | 14 ++++++++++---- gtsfm/scene_optimizer.py | 6 +++++- gtsfm/two_view_estimator.py | 6 +++--- gtsfm/utils/io.py | 3 +-- gtsfm/utils/metrics.py | 11 ++++++----- 6 files changed, 27 insertions(+), 17 deletions(-) diff --git a/gtsfm/evaluation/metrics_report.py b/gtsfm/evaluation/metrics_report.py index 2b8b128c0..881fd0ec2 100644 --- a/gtsfm/evaluation/metrics_report.py +++ b/gtsfm/evaluation/metrics_report.py @@ -8,7 +8,7 @@ Authors: Akshay Krishnan, Jon Womack """ from collections import defaultdict -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union import numpy as np import plotly.graph_objects as go @@ -372,7 +372,7 @@ def get_html_header() -> str: def generate_metrics_report_html( metrics_groups: List[GtsfmMetricsGroup], html_path: str, - other_pipelines_metrics_groups: Dict[str, List[GtsfmMetricsGroup]], + other_pipelines_metrics_groups: Optional[Dict[str, List[GtsfmMetricsGroup]]], ) -> None: """Generates a report for metrics groups with plots and tables and saves it to HTML. diff --git a/gtsfm/runner/gtsfm_runner_base.py b/gtsfm/runner/gtsfm_runner_base.py index 9a902f734..930d5d4be 100644 --- a/gtsfm/runner/gtsfm_runner_base.py +++ b/gtsfm/runner/gtsfm_runner_base.py @@ -296,8 +296,12 @@ def run(self) -> GtsfmData: angular_err_threshold_deg=self.scene_optimizer._pose_angular_error_thresh, metric_group_name="verifier_summary_{}".format(two_view_estimator.POST_ISP_REPORT_TAG), ) - two_view_metrics.add_metric(GtsfmMetric('total_correspondence_generation_duration_sec', correspondence_generation_duration_sec)) - two_view_metrics.add_metric(GtsfmMetric('total_two_view_estimation_duration_sec', two_view_estimation_duration_sec)) + two_view_metrics.add_metric( + GtsfmMetric("total_correspondence_generation_duration_sec", correspondence_generation_duration_sec) + ) + two_view_metrics.add_metric( + GtsfmMetric("total_two_view_estimation_duration_sec", two_view_estimation_duration_sec) + ) all_metrics_groups = [retriever_metrics, two_view_metrics] delayed_sfm_result, delayed_io, delayed_mvo_metrics_groups = self.scene_optimizer.create_computation_graph( @@ -336,7 +340,7 @@ def unzip_two_view_results( ) -> Tuple[ Dict[Tuple[int, int], Rot3], Dict[Tuple[int, int], Unit3], - Dict[Tuple[int, int], np.ndarray], + Dict[Tuple[int, int], np.ndarray],ptwo Dict[Tuple[int, int], TwoViewEstimationReport], ]: """Unzip the tuple TWO_VIEW_OUTPUT into 1 dictionary for 1 element in the tuple.""" @@ -374,4 +378,6 @@ def save_metrics_reports(metrics_group_list: List[GtsfmMetricsGroup], metrics_pa metrics_utils.save_metrics_as_json(metrics_group_list, metrics_path) metrics_utils.save_metrics_as_json(metrics_group_list, str(REACT_METRICS_PATH)) - metrics_report.generate_metrics_report_html(metrics_group_list, os.path.join(metrics_path, "gtsfm_metrics_report.html"), None) + metrics_report.generate_metrics_report_html( + metrics_group_list, os.path.join(metrics_path, "gtsfm_metrics_report.html"), None + ) diff --git a/gtsfm/scene_optimizer.py b/gtsfm/scene_optimizer.py index 69f6cb385..0f378762b 100644 --- a/gtsfm/scene_optimizer.py +++ b/gtsfm/scene_optimizer.py @@ -172,7 +172,10 @@ def create_computation_graph( # Persist all front-end metrics and their summaries. # TODO(akshay-krishnan): this delays saving the frontend reports until MVO has completed, not ideal. metrics_graph_list: List[Delayed] = [] - save_retrieval_metrics = self.retriever._matching_regime in [ImageMatchingRegime.RETRIEVAL, ImageMatchingRegime.SEQUENTIAL_WITH_RETRIEVAL] + save_retrieval_metrics = self.retriever._matching_regime in [ + ImageMatchingRegime.RETRIEVAL, + ImageMatchingRegime.SEQUENTIAL_WITH_RETRIEVAL, + ] annotation = dask.annotate(workers=self._output_worker) if self._output_worker else dask.annotate() with annotation: delayed_results.append( @@ -409,6 +412,7 @@ def save_gtsfm_data( return saving_graph_list + def save_full_frontend_metrics( two_view_report_dict: Dict[Tuple[int, int], TwoViewEstimationReport], images: List[Image], diff --git a/gtsfm/two_view_estimator.py b/gtsfm/two_view_estimator.py index f20bae9d6..3ed3dae0a 100644 --- a/gtsfm/two_view_estimator.py +++ b/gtsfm/two_view_estimator.py @@ -597,10 +597,11 @@ def get_two_view_reports_summary( Args: two_view_report_dict: Front-end metrics for pairs of images. images: List of all images for this scene, in order of image/frame index. - + Returns: List of dictionaries, where each dictionary contains the metrics for an image pair. - """ + """ + def round_fn(x): return round(x, 2) if x else None @@ -637,4 +638,3 @@ def round_fn(x): } ) return metrics_list - \ No newline at end of file diff --git a/gtsfm/utils/io.py b/gtsfm/utils/io.py index 4013dada5..1d8f31a0e 100644 --- a/gtsfm/utils/io.py +++ b/gtsfm/utils/io.py @@ -211,7 +211,7 @@ def colmap2gtsfm( sfmtracks_gtsfm = [] for point3D in points3D.values(): sfmtrack = SfmTrack(point3D.xyz) - for (image_id, point2d_idx) in zip(point3D.image_ids, point3D.point2D_idxs): + for image_id, point2d_idx in zip(point3D.image_ids, point3D.point2D_idxs): sfmtrack.addMeasurement(image_id_to_idx[image_id], images[image_id].xys[point2d_idx]) sfmtracks_gtsfm.append(sfmtrack) @@ -243,7 +243,6 @@ def read_cameras_txt(fpath: str) -> Optional[List[Cal3Bundler]]: calibrations = [] for line in lines[3:]: - cam_params = line.split() # Note that u0 is px, and v0 is py model = cam_params[1] diff --git a/gtsfm/utils/metrics.py b/gtsfm/utils/metrics.py index dc9ef3559..a1eaa4f05 100644 --- a/gtsfm/utils/metrics.py +++ b/gtsfm/utils/metrics.py @@ -35,6 +35,7 @@ logger = logger_utils.get_logger() + def compute_correspondence_metrics( keypoints_i1: Keypoints, keypoints_i2: Keypoints, @@ -225,7 +226,7 @@ def compute_rotation_angle_metric(wRi_list: List[Optional[Rot3]], gt_wRi_list: L A GtsfmMetric for the N rotation angle errors, in degrees. """ errors = [] - for (wRi, gt_wRi) in zip(wRi_list, gt_wRi_list): + for wRi, gt_wRi in zip(wRi_list, gt_wRi_list): if wRi is not None and gt_wRi is not None: errors.append(comp_utils.compute_relative_rotation_angle(wRi, gt_wRi)) else: @@ -249,7 +250,7 @@ def compute_translation_distance_metric( A statistics dict of the metrics errors in degrees. """ errors = [] - for (wti, gt_wti) in zip(wti_list, gt_wti_list): + for wti, gt_wti in zip(wti_list, gt_wti_list): if wti is not None and gt_wti is not None: errors.append(comp_utils.compute_points_distance_l2(wti, gt_wti)) return GtsfmMetric("translation_error_distance", errors) @@ -268,7 +269,7 @@ def compute_relative_translation_angle_metric( A GtsfmMetric for the relative translation angle errors, in degrees. """ angles: List[Optional[float]] = [] - for (i1, i2) in i2Ui1_dict: + for i1, i2 in i2Ui1_dict: i2Ui1 = i2Ui1_dict[(i1, i2)] angles.append(comp_utils.compute_translation_to_direction_angle(i2Ui1, wTi_list[i2], wTi_list[i1])) return GtsfmMetric("relative_translation_angle_error_deg", np.array(angles, dtype=np.float32)) @@ -386,7 +387,7 @@ def get_twoview_translation_directions(wTi_list: List[Optional[Pose3]]) -> Dict[ # check against all possible image pairs -- compute unit translation directions i2Ui1_dict = {} possible_img_pair_idxs = list(itertools.combinations(range(number_images), 2)) - for (i1, i2) in possible_img_pair_idxs: + for i1, i2 in possible_img_pair_idxs: # compute the exact relative pose if wTi_list[i1] is None or wTi_list[i2] is None: i2Ui1 = None @@ -494,7 +495,7 @@ def get_measurement_angle_errors( List of angles between the measured and ground truth translation directions. """ errors: List[float] = [] - for (i1, i2) in i1_i2_pairs: + for i1, i2 in i1_i2_pairs: if (i1, i2) in i2Ui1_measurements and (i1, i2) in gt_i2Ui1_measurements: error = comp_utils.compute_relative_unit_translation_angle( i2Ui1_measurements[(i1, i2)], gt_i2Ui1_measurements[(i1, i2)] From aa08a951447bec0e8ad9de2faceeeb1fba051055 Mon Sep 17 00:00:00 2001 From: Akshay Krishnan Date: Sun, 13 Aug 2023 05:40:04 +0000 Subject: [PATCH 14/30] fix typo --- gtsfm/runner/gtsfm_runner_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtsfm/runner/gtsfm_runner_base.py b/gtsfm/runner/gtsfm_runner_base.py index 930d5d4be..aaa417372 100644 --- a/gtsfm/runner/gtsfm_runner_base.py +++ b/gtsfm/runner/gtsfm_runner_base.py @@ -340,7 +340,7 @@ def unzip_two_view_results( ) -> Tuple[ Dict[Tuple[int, int], Rot3], Dict[Tuple[int, int], Unit3], - Dict[Tuple[int, int], np.ndarray],ptwo + Dict[Tuple[int, int], np.ndarray], Dict[Tuple[int, int], TwoViewEstimationReport], ]: """Unzip the tuple TWO_VIEW_OUTPUT into 1 dictionary for 1 element in the tuple.""" From c42578a54c6437d94cd907abe6c2467fec497260 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Sun, 13 Aug 2023 01:47:53 -0400 Subject: [PATCH 15/30] Add metrics to measure timing for rotation averaging, translation averaging, and bundle adjustment (#694) * add timing metrics * Fix formatting * Remove formatting changes * fix formatting * fix formatting * fix formatting * fix formatting and docstrings * fix missing arg and improve fn naming * reformat * Rearrange and rename functions * improve docstrings * format code * update --------- Co-authored-by: Akshay Krishnan Co-authored-by: senselessdev1 --- .../rotation/rotation_averaging_base.py | 45 ++++- .../averaging/translation/averaging_1dsfm.py | 13 ++ gtsfm/bundle/bundle_adjustment.py | 173 +++++++++++++----- gtsfm/configs/correspondence/loftr.yaml | 4 +- gtsfm/two_view_estimator.py | 1 + gtsfm/utils/metrics.py | 6 +- 6 files changed, 184 insertions(+), 58 deletions(-) diff --git a/gtsfm/averaging/rotation/rotation_averaging_base.py b/gtsfm/averaging/rotation/rotation_averaging_base.py index 5a04968d9..31c06dbf1 100644 --- a/gtsfm/averaging/rotation/rotation_averaging_base.py +++ b/gtsfm/averaging/rotation/rotation_averaging_base.py @@ -3,6 +3,7 @@ Authors: Jing Wu, Ayush Baid """ import abc +import time from typing import Dict, List, Optional, Tuple import dask @@ -54,14 +55,45 @@ def run_rotation_averaging( underconstrained system or ill-constrained system). """ + def _run_rotation_averaging_base( + self, + num_images: int, + i2Ri1_dict: Dict[Tuple[int, int], Optional[Rot3]], + i1Ti2_priors: Dict[Tuple[int, int], PosePrior], + wTi_gt: List[Optional[Pose3]], + ) -> Tuple[List[Optional[Rot3]], GtsfmMetricsGroup]: + """Runs rotation averaging and computes metrics. + + Args: + num_images: Number of poses. + i2Ri1_dict: Relative rotations as dictionary (i1, i2): i2Ri1. + i1Ti2_priors: Priors on relative poses as dictionary(i1, i2): PosePrior on i1Ti2. + wTi_gt: Ground truth global rotations to compare against. + + Returns: + Global rotations for each camera pose, i.e. wRi, as a list. The number of entries in the list is + `num_images`. The list may contain `None` where the global rotation could not be computed (either + underconstrained system or ill-constrained system). + Metrics on global rotations. + """ + start_time = time.time() + wRis = self.run_rotation_averaging(num_images, i2Ri1_dict, i1Ti2_priors) + run_time = time.time() - start_time + + metrics = self.evaluate(wRis, wTi_gt) + metrics.add_metric(GtsfmMetric("total_duration_sec", run_time)) + + return wRis, metrics + def evaluate(self, wRi_computed: List[Optional[Rot3]], wTi_gt: List[Optional[Pose3]]) -> GtsfmMetricsGroup: - """Evaluate the global rotations computed by the rotation averaging implementation. + """Evaluates the global rotations computed by the rotation averaging implementation. Args: - wRi_computed: list of global rotations computed. - wTi_gt: ground truth global rotations to compare against. + wRi_computed: List of global rotations computed. + wTi_gt: Ground truth global rotations to compare against. + Raises: - ValueError: if the length of the computed and GT list differ. + ValueError: If the length of the computed and GT list differ. Returns: Metrics on global rotations. @@ -97,7 +129,8 @@ def create_computation_graph( global rotations wrapped using dask.delayed. """ - wRis = dask.delayed(self.run_rotation_averaging)(num_images, i2Ri1_graph, i1Ti2_priors) - metrics = dask.delayed(self.evaluate)(wRis, gt_wTi_list) + wRis, metrics = dask.delayed(self._run_rotation_averaging_base, nout=2)( + num_images, i2Ri1_dict=i2Ri1_graph, i1Ti2_priors=i1Ti2_priors, wTi_gt=gt_wTi_list + ) return wRis, metrics diff --git a/gtsfm/averaging/translation/averaging_1dsfm.py b/gtsfm/averaging/translation/averaging_1dsfm.py index a168b5e39..b2777bb9b 100644 --- a/gtsfm/averaging/translation/averaging_1dsfm.py +++ b/gtsfm/averaging/translation/averaging_1dsfm.py @@ -10,9 +10,11 @@ Authors: Jing Wu, Ayush Baid, Akshay Krishnan """ +import time from collections import defaultdict from enum import Enum from typing import DefaultDict, Dict, List, Optional, Set, Tuple + import gtsam import numpy as np from gtsam import ( @@ -461,6 +463,7 @@ def run_translation_averaging( w_i2Ui1_dict, valid_cameras = get_valid_measurements_in_world_frame(i2Ui1_dict, wRi_list) + start_time = time.time() if self._use_tracks_for_averaging: if tracks_2d is None: logger.info("No tracks provided for translation averaging. Falling back to camera unit translations.") @@ -473,10 +476,13 @@ def run_translation_averaging( else: w_i2Ui1_dict_tracks = {} + inlier_computation_start_time = time.time() w_i2Ui1_dict_inliers, w_i2Ui1_dict_tracks_inliers, inlier_cameras = self.compute_inliers( w_i2Ui1_dict, w_i2Ui1_dict_tracks ) + inlier_computation_time = time.time() - inlier_computation_start_time + averaging_start_time = time.time() wti_list = self.__run_averaging( num_images=num_images, w_i2Ui1_dict=w_i2Ui1_dict_inliers, @@ -486,6 +492,7 @@ def run_translation_averaging( absolute_pose_priors=absolute_pose_priors, scale_factor=scale_factor, ) + averaging_time = time.time() - averaging_start_time # Compute the metrics. ta_metrics = compute_metrics(set(w_i2Ui1_dict_inliers.keys()), i2Ui1_dict, wRi_list, wti_list, gt_wTi_list) @@ -497,6 +504,12 @@ def run_translation_averaging( wTi_list = [ Pose3(wRi, wti) if wRi is not None and wti is not None else None for wRi, wti in zip(wRi_list, wti_list) ] + total_time = time.time() - start_time + logger.info("Translation averaging took %.4f seconds.", total_time) + ta_metrics.add_metric(GtsfmMetric("total_duration_sec", total_time)) + ta_metrics.add_metric(GtsfmMetric("outier_rejection_duration_sec", inlier_computation_time)) + ta_metrics.add_metric(GtsfmMetric("optimization_duration_sec", averaging_time)) + return wTi_list, ta_metrics diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index 678ad538e..0e8b6ff15 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -3,6 +3,7 @@ Authors: Xiaolong Wu, John Lambert, Ayush Baid """ import logging +import time from collections import Counter from pathlib import Path from typing import Dict, List, Optional, Set, Tuple @@ -114,11 +115,11 @@ def __reprojection_factors(self, initial_data: GtsfmData, is_fisheye_calibration sfm_factor_class = GeneralSFMFactor2Cal3Fisheye if is_fisheye_calibration else GeneralSFMFactor2Cal3Bundler for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) # SfmTrack - # retrieve the SfmMeasurement objects + # Retrieve the SfmMeasurement objects. for m_idx in range(track.numberMeasurements()): - # i represents the camera index, and uv is the 2d measurement + # `i` represents the camera index, and `uv` is the 2d measurement i, uv = track.measurement(m_idx) - # note use of shorthand symbols C and P + # Note use of shorthand symbols `X` and `P`. graph.push_back( sfm_factor_class( uv, @@ -218,7 +219,7 @@ def __construct_factor_graph( graph = NonlinearFactorGraph() - # Create a factor graph + # Create a factor graph. graph.push_back( self.__reprojection_factors(initial_data=initial_data, is_fisheye_calibration=is_fisheye_calibration) ) @@ -247,7 +248,7 @@ def __initial_values(self, initial_data: GtsfmData) -> Values: """Initialize all the variables in the factor graph.""" initial_values = gtsam.Values() - # add each camera + # Add each camera. for loop_idx, i in enumerate(initial_data.get_valid_camera_indices()): camera = initial_data.get_camera(i) initial_values.insert(X(i), camera.pose()) @@ -255,7 +256,7 @@ def __initial_values(self, initial_data: GtsfmData) -> Values: # add only one value if calibrations are shared initial_values.insert(K(self.__map_to_calibration_variable(i)), camera.calibration()) - # add each SfmTrack + # Add each SfmTrack. for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) initial_values.insert(P(j), track.point3()) @@ -285,6 +286,48 @@ def __cameras_to_model( return sorted(list(cameras)) + def run_ba_stage_with_filtering( + self, + initial_data: GtsfmData, + absolute_pose_priors: List[Optional[PosePrior]], + relative_pose_priors: Dict[Tuple[int, int], PosePrior], + reproj_error_thresh: Optional[float], + verbose: bool = True, + ) -> Tuple[GtsfmData, GtsfmData, List[bool], float]: + """Runs bundle adjustment and optionally filters the resulting tracks by reprojection error.""" + cameras_to_model = self.__cameras_to_model(initial_data, absolute_pose_priors, relative_pose_priors) + graph = self.__construct_factor_graph( + cameras_to_model=cameras_to_model, + initial_data=initial_data, + absolute_pose_priors=absolute_pose_priors, + relative_pose_priors=relative_pose_priors, + ) + initial_values = self.__initial_values(initial_data=initial_data) + result_values = self.__optimize_factor_graph(graph, initial_values) + + # Print error. + final_error = graph.error(result_values) + if verbose: + logger.info("initial error: %.2f", graph.error(initial_values)) + logger.info("final error: %.2f", final_error) + + # Convert the `Values` results to a `GtsfmData` instance. + optimized_data = values_to_gtsfm_data(result_values, initial_data, self._shared_calib) + + # Filter landmarks by reprojection error. + if reproj_error_thresh is not None: + if verbose: + logger.info("[Result] Number of tracks before filtering: %d", optimized_data.number_tracks()) + filtered_result, valid_mask = optimized_data.filter_landmarks(reproj_error_thresh) + if verbose: + logger.info("[Result] Number of tracks after filtering: %d", filtered_result.number_tracks()) + + else: + valid_mask = [True] * optimized_data.number_tracks() + filtered_result = optimized_data + + return optimized_data, filtered_result, valid_mask, final_error + def run_ba( self, initial_data: GtsfmData, @@ -292,7 +335,7 @@ def run_ba( relative_pose_priors: Dict[Tuple[int, int], PosePrior], verbose: bool = True, ) -> Tuple[GtsfmData, GtsfmData, List[bool]]: - """Run the bundle adjustment by forming factor graph and optimizing using Levenberg–Marquardt optimization. + """Runs bundle adjustment by forming a factor graph and optimizing it using Levenberg–Marquardt optimization. Args: initial_data: Initialized cameras, tracks w/ their 3d landmark from triangulation. @@ -318,48 +361,77 @@ def run_ba( num_ba_steps = len(self._reproj_error_thresholds) for step, reproj_error_thresh in enumerate(self._reproj_error_thresholds): - cameras_to_model = self.__cameras_to_model(initial_data, absolute_pose_priors, relative_pose_priors) - graph = self.__construct_factor_graph( - cameras_to_model=cameras_to_model, - initial_data=initial_data, - absolute_pose_priors=absolute_pose_priors, - relative_pose_priors=relative_pose_priors, + # Use intermediate result as initial condition for next step. + (optimized_data, filtered_result, valid_mask, final_error) = self.run_ba_stage_with_filtering( + initial_data, + absolute_pose_priors, + relative_pose_priors, + reproj_error_thresh, + verbose, ) - initial_values = self.__initial_values(initial_data=initial_data) - result_values = self.__optimize_factor_graph(graph, initial_values) - - # Print error. - final_error = graph.error(result_values) - if verbose: - logger.info(f"initial error: {graph.error(initial_values):.2f}") - logger.info(f"final error: {final_error:.2f}") - - # Construct the results. - optimized_data = values_to_gtsfm_data(result_values, initial_data, self._shared_calib) + # Print intermediate results. + if num_ba_steps > 1: + logger.info( + "[BA Step %d/%d] Error: %.2f, Number of tracks: %d" + % (step + 1, num_ba_steps, final_error, filtered_result.number_tracks()) + ) - # Filter landmarks by reprojection error. - if reproj_error_thresh is not None: - if verbose: - logger.info("[Result] Number of tracks before filtering: %d", optimized_data.number_tracks()) - filtered_result, valid_mask = optimized_data.filter_landmarks(reproj_error_thresh) - if verbose: - logger.info("[Result] Number of tracks after filtering: %d", filtered_result.number_tracks()) + return optimized_data, filtered_result, valid_mask - else: - valid_mask = [True] * optimized_data.number_tracks() - filtered_result = optimized_data + def _run_ba_and_evaluate( + self, + initial_data: GtsfmData, + absolute_pose_priors: List[Optional[PosePrior]], + relative_pose_priors: Dict[Tuple[int, int], PosePrior], + cameras_gt: List[Optional[gtsfm_types.CAMERA_TYPE]], + save_dir: Optional[str] = None, + verbose: bool = True, + ) -> Tuple[GtsfmData, GtsfmData, List[bool], GtsfmMetricsGroup]: + """Runs the equivalent of `run_ba()` and `evaluate()` in a single function, to enable time profiling.""" + logger.info( + "Input: %d tracks on %d cameras", initial_data.number_tracks(), len(initial_data.get_valid_camera_indices()) + ) + if initial_data.number_tracks() == 0 or len(initial_data.get_valid_camera_indices()) == 0: + # No cameras or tracks to optimize, so bundle adjustment is not possible. + logger.error( + "Bundle adjustment aborting, optimization cannot be performed without any tracks or any cameras." + ) + return initial_data, initial_data, [False] * initial_data.number_tracks() + step_times = [] + start_time = time.time() - # Set intermediate result as initial condition for next step. - initial_data = filtered_result + num_ba_steps = len(self._reproj_error_thresholds) + for step, reproj_error_thresh in enumerate(self._reproj_error_thresholds): + step_start_time = time.time() + (optimized_data, filtered_result, valid_mask, final_error) = self.run_ba_stage_with_filtering( + initial_data=initial_data, + absolute_pose_priors=absolute_pose_priors, + relative_pose_priors=relative_pose_priors, + reproj_error_thresh=reproj_error_thresh, + verbose=verbose, + ) + step_times.append(time.time() - step_start_time) # Print intermediate results. if num_ba_steps > 1: logger.info( - "[BA Step %d/%d] Error: %.2f, Number of tracks: %d" - % (step + 1, num_ba_steps, final_error, filtered_result.number_tracks()) + "[BA Stage @ thresh=%.2f px %d/%d] Error: %.2f, Number of tracks: %d" + % ( + reproj_error_thresh, + step + 1, + num_ba_steps, + final_error, + filtered_result.number_tracks(), + ) ) + total_time = time.time() - start_time - return optimized_data, filtered_result, valid_mask + metrics = self.evaluate(optimized_data, filtered_result, cameras_gt, save_dir) + for i, step_time in enumerate(step_times): + metrics.add_metric(GtsfmMetric(f"step_{i}_run_duration_sec", step_time)) + metrics.add_metric(GtsfmMetric("total_run_duration_sec", total_time)) + + return optimized_data, filtered_result, valid_mask, metrics def evaluate( self, @@ -368,7 +440,8 @@ def evaluate( cameras_gt: List[Optional[gtsfm_types.CAMERA_TYPE]], save_dir: Optional[str] = None, ) -> GtsfmMetricsGroup: - """ + """Computes metrics on the bundle adjustment result, and packages them in a GtsfmMetricsGroup object. + Args: unfiltered_data: Optimized BA result, before filtering landmarks by reprojection error. filtered_data: Optimized BA result, after filtering landmarks and cameras. @@ -423,19 +496,23 @@ def create_computation_graph( """Create the computation graph for performing bundle adjustment. Args: - sfm_data_graph: an GtsfmData object wrapped up using dask.delayed - absolute_pose_priors: priors on the poses of the cameras (not delayed). - relative_pose_priors: priors on poses between cameras (not delayed). + sfm_data_graph: An GtsfmData object wrapped up using dask.delayed. + absolute_pose_priors: Priors on the poses of the cameras (not delayed). + relative_pose_priors: Priors on poses between cameras (not delayed). + cameras_gt: Ground truth camera calibration & pose for each image/camera. + save_dir: Directory where artifacts and plots should be saved to disk. Returns: GtsfmData aligned to GT (if provided), wrapped up using dask.delayed Metrics group for BA results, wrapped up using dask.delayed """ - optimized_sfm_data, filtered_sfm_data, _ = dask.delayed(self.run_ba, nout=3)( - sfm_data_graph, absolute_pose_priors, relative_pose_priors - ) - metrics_graph = dask.delayed(self.evaluate)( - optimized_sfm_data, filtered_sfm_data, cameras_gt, save_dir=save_dir + + _, filtered_sfm_data, _, metrics_graph = dask.delayed(self._run_ba_and_evaluate, nout=4)( + sfm_data_graph, + absolute_pose_priors, + relative_pose_priors, + cameras_gt, + save_dir=save_dir, ) return filtered_sfm_data, metrics_graph diff --git a/gtsfm/configs/correspondence/loftr.yaml b/gtsfm/configs/correspondence/loftr.yaml index 9dddb2cc7..1f363aa2f 100644 --- a/gtsfm/configs/correspondence/loftr.yaml +++ b/gtsfm/configs/correspondence/loftr.yaml @@ -2,5 +2,7 @@ CorrespondenceGenerator: _target_: gtsfm.frontend.correspondence_generator.image_correspondence_generator.ImageCorrespondenceGenerator matcher: - _target_: gtsfm.frontend.matcher.loftr.LOFTR + _target_: gtsfm.frontend.cacher.image_matcher_cacher.ImageMatcherCacher + matcher_obj: + _target_: gtsfm.frontend.matcher.loftr.LOFTR \ No newline at end of file diff --git a/gtsfm/two_view_estimator.py b/gtsfm/two_view_estimator.py index ecf6bc64b..2d5e02a50 100644 --- a/gtsfm/two_view_estimator.py +++ b/gtsfm/two_view_estimator.py @@ -154,6 +154,7 @@ def bundle_adjust( i2Ri1_initial: The relative rotation to be used as initial rotation between cameras. i2Ui1_initial: The relative unit direction, to be used to initialize initial translation between cameras. i2Ti1_prior: Prior on the relative pose for cameras (i1, i2). + Returns: Optimized relative rotation i2Ri1. Optimized unit translation i2Ui1. diff --git a/gtsfm/utils/metrics.py b/gtsfm/utils/metrics.py index 2777cae6e..5c08c2177 100644 --- a/gtsfm/utils/metrics.py +++ b/gtsfm/utils/metrics.py @@ -535,9 +535,9 @@ def pose_auc( if save_plot: if save_dir is None: raise ValueError("If `save_plot` is True, then `save_dir` must be provided.") - _ = plt.figure(dpi=200, facecolor='white') - plt.style.use('ggplot') - sns.set_style({'font.family': 'Times New Roman'}) + _ = plt.figure(dpi=200, facecolor="white") + plt.style.use("ggplot") + sns.set_style({"font.family": "Times New Roman"}) plt.scatter(e, r, 20, color="k", marker=".") plt.plot(e, r, color="r") plt.ylabel("Recall") From a6f83ce5b08fc4b11de602f229586626761074d2 Mon Sep 17 00:00:00 2001 From: Akshay Krishnan Date: Mon, 14 Aug 2023 06:51:49 +0000 Subject: [PATCH 16/30] formatting fixes --- gtsfm/common/gtsfm_data.py | 1 + gtsfm/utils/metrics.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/gtsfm/common/gtsfm_data.py b/gtsfm/common/gtsfm_data.py index ea0c09fd8..939455b6a 100644 --- a/gtsfm/common/gtsfm_data.py +++ b/gtsfm/common/gtsfm_data.py @@ -20,6 +20,7 @@ EQUALITY_TOLERANCE = 1e-5 + class GtsfmData: """Class containing cameras and tracks, essentially describing the complete 3D scene. diff --git a/gtsfm/utils/metrics.py b/gtsfm/utils/metrics.py index a1eaa4f05..b02c6013b 100644 --- a/gtsfm/utils/metrics.py +++ b/gtsfm/utils/metrics.py @@ -16,7 +16,6 @@ from trimesh import Trimesh import gtsfm.utils.geometry_comparisons as comp_utils -import gtsfm.utils.io as io_utils import gtsfm.utils.logger as logger_utils import gtsfm.utils.verification as verification_utils from gtsfm.common.gtsfm_data import GtsfmData From 8a9eac29394aed981be84f3e424f5c262cc6720b Mon Sep 17 00:00:00 2001 From: Akshay Krishnan Date: Mon, 14 Aug 2023 06:53:32 +0000 Subject: [PATCH 17/30] update docstring --- gtsfm/two_view_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtsfm/two_view_estimator.py b/gtsfm/two_view_estimator.py index 3ed3dae0a..abb865e34 100644 --- a/gtsfm/two_view_estimator.py +++ b/gtsfm/two_view_estimator.py @@ -592,7 +592,7 @@ def get_two_view_reports_summary( two_view_report_dict: Dict[Tuple[int, int], TwoViewEstimationReport], images: List[Image], ) -> List[Dict[str, Any]]: - """Converts the TwoViewEstimationReports for all image pairs to a Dict and saves it as JSON. + """Converts the TwoViewEstimationReports to a summary dict for each image pair. Args: two_view_report_dict: Front-end metrics for pairs of images. From ca840ff3f4b94ea2ddbdf0aa064d0eb19bcfbf2f Mon Sep 17 00:00:00 2001 From: John Lambert Date: Tue, 15 Aug 2023 00:12:38 -0400 Subject: [PATCH 18/30] Fix Dask worker memory limit warning (#697) * add timing metrics * Fix formatting * Remove formatting changes * fix formatting * fix formatting * fix formatting * fix formatting and docstrings * fix missing arg and improve fn naming * reformat * Rearrange and rename functions * improve docstrings * format code * update * only override memory limit if required --------- Co-authored-by: Akshay Krishnan Co-authored-by: senselessdev1 Co-authored-by: senselessdev1 --- gtsfm/runner/gtsfm_runner_base.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/gtsfm/runner/gtsfm_runner_base.py b/gtsfm/runner/gtsfm_runner_base.py index e9cf89b0a..cb69a43fa 100644 --- a/gtsfm/runner/gtsfm_runner_base.py +++ b/gtsfm/runner/gtsfm_runner_base.py @@ -59,7 +59,7 @@ def construct_argparser(self) -> argparse.ArgumentParser: help="Number of threads per each worker", ) parser.add_argument( - "--worker_memory_limit", type=str, default="auto", help="Memory limit per worker, e.g. `8GB`" + "--worker_memory_limit", type=str, default=None, help="Memory limit per worker, e.g. `8GB`" ) parser.add_argument( "--config_name", @@ -239,11 +239,13 @@ def run(self) -> GtsfmData: self.loader._input_worker = io_worker self.scene_optimizer._output_worker = io_worker else: - cluster = LocalCluster( - n_workers=self.parsed_args.num_workers, - threads_per_worker=self.parsed_args.threads_per_worker, - memory_limit=self.parsed_args.worker_memory_limit, - ) + local_cluster_kwargs = { + "n_workers": self.parsed_args.num_workers, + "threads_per_worker": self.parsed_args.threads_per_worker, + } + if self.parsed_args.worker_memory_limit is not None: + local_cluster_kwargs["memory_limit"] = self.parsed_args.worker_memory_limit + cluster = LocalCluster(**local_cluster_kwargs) client = Client(cluster) # Create process graph. From 546929209ebee28f1258fca365f6f4a7e8bbc864 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Wed, 23 Aug 2023 23:37:35 -0400 Subject: [PATCH 19/30] Use unified config in CI and configure num_matched via CLI (#698) * make num_matched CLI configurable * add unified config * use unified config * remove unnecessary class attribute * make sure colmap-formatted files are sorted * add config * reformat * Fix typo * add script * include loop over astrovision * num_matched=5, max_frame_lookahead=15 --------- Co-authored-by: senselessdev1 --- .github/scripts/execute_single_benchmark.sh | 9 +- .github/workflows/benchmark.yml | 30 ++-- benchmark_wildcat.sh | 139 ++++++++++++++++++ gtsfm/configs/correspondence/lightglue.yaml | 15 ++ gtsfm/configs/unified.yaml | 93 ++++++++++++ .../joint_netvlad_sequential_retriever.py | 3 +- gtsfm/runner/gtsfm_runner_base.py | 25 +++- gtsfm/utils/io.py | 16 +- tests/utils/test_io_utils.py | 22 +++ 9 files changed, 328 insertions(+), 24 deletions(-) create mode 100755 benchmark_wildcat.sh create mode 100644 gtsfm/configs/correspondence/lightglue.yaml create mode 100644 gtsfm/configs/unified.yaml diff --git a/.github/scripts/execute_single_benchmark.sh b/.github/scripts/execute_single_benchmark.sh index bf0c27c02..e1b3f4fb4 100755 --- a/.github/scripts/execute_single_benchmark.sh +++ b/.github/scripts/execute_single_benchmark.sh @@ -51,7 +51,8 @@ fi if [ "$LOADER_NAME" == "olsson-loader" ]; then python gtsfm/runner/run_scene_optimizer_olssonloader.py \ --dataset_root $DATASET_ROOT \ - --config_name ${CONFIG_NAME}.yaml \ + --config_name unified \ + --correspondence_generator_config_name ${CONFIG_NAME} \ --max_frame_lookahead $MAX_FRAME_LOOKAHEAD \ --max_resolution ${MAX_RESOLUTION} \ ${SHARE_INTRINSICS_ARG} \ @@ -61,7 +62,8 @@ elif [ "$LOADER_NAME" == "colmap-loader" ]; then python gtsfm/runner/run_scene_optimizer_colmaploader.py \ --images_dir ${IMAGES_DIR} \ --colmap_files_dirpath $COLMAP_FILES_DIRPATH \ - --config_name ${CONFIG_NAME}.yaml \ + --config_name unified \ + --correspondence_generator_config_name ${CONFIG_NAME} \ --max_frame_lookahead $MAX_FRAME_LOOKAHEAD \ --max_resolution ${MAX_RESOLUTION} \ ${SHARE_INTRINSICS_ARG} \ @@ -70,7 +72,8 @@ elif [ "$LOADER_NAME" == "colmap-loader" ]; then elif [ "$LOADER_NAME" == "astrovision" ]; then python gtsfm/runner/run_scene_optimizer_astrovision.py \ --data_dir $DATASET_ROOT \ - --config_name ${CONFIG_NAME}.yaml \ + --config_name unified \ + --correspondence_generator_config_name ${CONFIG_NAME} \ --max_frame_lookahead $MAX_FRAME_LOOKAHEAD \ --max_resolution ${MAX_RESOLUTION} \ ${SHARE_INTRINSICS_ARG} \ diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 80142cf0b..860ec6942 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -12,21 +12,21 @@ jobs: matrix: config_dataset_info: [ - # config dataset lookahead img-extension source loader max-res share-intrinsics - [sift_front_end, door-12, 12, JPG, test_data, olsson-loader, 1296, true], - [deep_front_end, door-12, 12, JPG, test_data, olsson-loader, 1296, true], - [sift_front_end, skydio-8, 8, jpg, gdrive , colmap-loader, 760, true], - [deep_front_end, skydio-8, 8, jpg, gdrive, colmap-loader, 760, true], - [sift_front_end, skydio-32, 32, jpg, gdrive, colmap-loader, 760, true], - [deep_front_end, skydio-32, 32, jpg, gdrive, colmap-loader, 760, true], - [sift_front_end, palace-fine-arts-281, 25, jpg, wget, olsson-loader, 320, true], - [deep_front_end, notre-dame-20, 20, jpg, gdrive, colmap-loader, 760, false], - [sift_front_end_astrovision, 2011205_rc3, 65, png, wget, astrovision, 1024, true], - [deep_front_end_astrovision, 2011205_rc3, 65, png, wget, astrovision, 1024, true], - [sift_front_end, gerrard-hall-100, 0, jpg, wget, colmap-loader, 760, true], - [deep_front_end, gerrard-hall-100, 0, jpg, wget, colmap-loader, 760, true], - [sift_front_end, south-building-128, 0, jpg, wget, colmap-loader, 760, true], - [deep_front_end, south-building-128, 0, jpg, wget, colmap-loader, 760, true], + # config dataset lookahead img-extension source loader max-res share-intrinsics + [sift, door-12, 15, JPG, test_data, olsson-loader, 1296, true], + [lightglue, door-12, 15, JPG, test_data, olsson-loader, 1296, true], + [sift, skydio-8, 15, jpg, gdrive , colmap-loader, 760, true], + [lightglue, skydio-8, 15, jpg, gdrive, colmap-loader, 760, true], + [sift, skydio-32, 15, jpg, gdrive, colmap-loader, 760, true], + [lightglue, skydio-32, 15, jpg, gdrive, colmap-loader, 760, true], + [sift, palace-fine-arts-281, 15, jpg, wget, olsson-loader, 320, true], + [lightglue, notre-dame-20, 15, jpg, gdrive, colmap-loader, 760, false], + [sift, 2011205_rc3, 15, png, wget, astrovision, 1024, true], + [lightglue, 2011205_rc3, 15, png, wget, astrovision, 1024, true], + [sift, gerrard-hall-100, 15, jpg, wget, colmap-loader, 760, true], + [lightglue, gerrard-hall-100, 15, jpg, wget, colmap-loader, 760, true], + [sift, south-building-128, 15, jpg, wget, colmap-loader, 760, true], + [lightglue, south-building-128, 15, jpg, wget, colmap-loader, 760, true], ] defaults: run: diff --git a/benchmark_wildcat.sh b/benchmark_wildcat.sh new file mode 100755 index 000000000..ef8c07333 --- /dev/null +++ b/benchmark_wildcat.sh @@ -0,0 +1,139 @@ +# Script to launch jobs over various datasets & front-ends. + +USER_ROOT=$1 + +now=$(date +"%Y%m%d_%H%M%S") + + +datasets=( + palace-fine-arts-281 + skydio-crane-mast-501 + 2011205_rc3 + south-building-128 + gerrard-hall-100 + ) + +max_frame_lookahead_sizes=( + 5 + 10 + 15 + ) + +num_matched_sizes=( + 5 + 10 + 15 + 20 + 25 + ) + +correspondence_generator_config_names=( + sift + lightglue + superglue + loftr + ) + + +for num_matched in ${num_matched_sizes[@]}; do + for max_frame_lookahead in ${max_frame_lookahead_sizes[@]}; do + for dataset in ${datasets[@]}; do + for correspondence_generator_config_name in ${correspondence_generator_config_names[@]}; do + + echo "Dataset: "${dataset} + echo "Num matched: "${num_matched} + echo "Max frame lookahead: "${max_frame_lookahead} + echo "Correspondence Generator: "${correspondence_generator_config_name} + + if [[ $correspondence_generator_config_name == *"sift"* ]] + then + num_workers=10 + elif [[ $correspondence_generator_config_name == *"lightglue"* ]] + then + num_workers=1 + elif [[ $correspondence_generator_config_name == *"loftr"* ]] + then + num_workers=1 + fi + + if [[ $dataset == *"palace-fine-arts-281"* ]] + then + loader=olsson + dataset_root=/usr/local/gtsfm-data/palace-fine-arts-281 + elif [[ $dataset == *"skydio-crane-mast-501"* ]] + then + loader=colmap + images_dir=/usr/local/gtsfm-data/skydio-crane-mast-501/skydio-crane-mast-501-images + colmap_files_dirpath=/usr/local/gtsfm-data/skydio-crane-mast-501/skydio-501-colmap-pseudo-gt + elif [[ $dataset == *"2011205_rc3"* ]] + then + loader=astrovision + data_dir=/usr/local/gtsfm-data/2011205_rc3 + elif [[ $dataset == *"south-building-128"* ]] + then + loader=colmap + images_dir=/usr/local/gtsfm-data/south-building-128/images + colmap_files_dirpath=/usr/local/gtsfm-data/south-building-128/colmap-2023-07-28-txt + elif [[ $dataset == *"gerrard-hall-100"* ]] + then + loader=colmap + images_dir=/usr/local/gtsfm-data/gerrard-hall-100/images + colmap_files_dirpath=/usr/local/gtsfm-data/gerrard-hall-100/colmap-3.7-sparse-txt-2023-07-27 + fi + + OUTPUT_ROOT=${USER_ROOT}/${now}/${now}__${dataset}__results__num_matched${num_matched}__maxframelookahead${max_frame_lookahead}__760p__unified_${correspondence_generator_config_name} + mkdir -p $OUTPUT_ROOT + + if [[ $loader == *"olsson"* ]] + then + python gtsfm/runner/run_scene_optimizer_olssonloader.py \ + --mvs_off \ + --config unified \ + --correspondence_generator_config_name $correspondence_generator_config_name \ + --share_intrinsics \ + --dataset_root $dataset_root \ + --num_workers $num_workers \ + --num_matched $num_matched \ + --max_frame_lookahead $max_frame_lookahead \ + --worker_memory_limit "32GB" \ + --output_root $OUTPUT_ROOT \ + --max_resolution 760 \ + 2>&1 | tee $OUTPUT_ROOT/out.log + elif [[ $loader == *"olsson"* ]] + then + python gtsfm/runner/run_scene_optimizer_colmaploader.py \ + --mvs_off \ + --config unified \ + --correspondence_generator_config_name $correspondence_generator_config_name \ + --share_intrinsics \ + --images_dir $images_dir \ + --colmap_files_dirpath $colmap_files_dirpath \ + --num_workers $num_workers \ + --num_matched $num_matched \ + --max_frame_lookahead $max_frame_lookahead \ + --worker_memory_limit "32GB" \ + --output_root $OUTPUT_ROOT \ + --max_resolution 760 \ + 2>&1 | tee $OUTPUT_ROOT/out.log + elif [[ $loader == *"astrovision"* ]] + then + python gtsfm/runner/run_scene_optimizer_astrovision.py \ + --mvs_off \ + --config unified \ + --correspondence_generator_config_name $correspondence_generator_config_name \ + --share_intrinsics \ + --data_dir $data_dir \ + --num_workers $num_workers \ + --num_matched $num_matched \ + --max_frame_lookahead $max_frame_lookahead \ + --worker_memory_limit "32GB" \ + --output_root $OUTPUT_ROOT \ + --max_resolution 760 \ + 2>&1 | tee $OUTPUT_ROOT/out.log + fi + done + done + done +done + + diff --git a/gtsfm/configs/correspondence/lightglue.yaml b/gtsfm/configs/correspondence/lightglue.yaml new file mode 100644 index 000000000..0350ef75c --- /dev/null +++ b/gtsfm/configs/correspondence/lightglue.yaml @@ -0,0 +1,15 @@ + +CorrespondenceGenerator: + _target_: gtsfm.frontend.correspondence_generator.det_desc_correspondence_generator.DetDescCorrespondenceGenerator + + detector_descriptor: + _target_: gtsfm.frontend.cacher.detector_descriptor_cacher.DetectorDescriptorCacher + detector_descriptor_obj: + _target_: gtsfm.frontend.detector_descriptor.superpoint.SuperPointDetectorDescriptor + max_keypoints: 5000 + + matcher: + _target_: gtsfm.frontend.cacher.matcher_cacher.MatcherCacher + matcher_obj: + _target_: gtsfm.frontend.matcher.lightglue_matcher.LightGlueMatcher + features: "superpoint" \ No newline at end of file diff --git a/gtsfm/configs/unified.yaml b/gtsfm/configs/unified.yaml new file mode 100644 index 000000000..ddea02636 --- /dev/null +++ b/gtsfm/configs/unified.yaml @@ -0,0 +1,93 @@ +# Default front-end configuration. + +SceneOptimizer: + _target_: gtsfm.scene_optimizer.SceneOptimizer + save_gtsfm_data: True + save_two_view_correspondences_viz: False + save_3d_viz: True + pose_angular_error_thresh: 5 # degrees + + retriever: + _target_: gtsfm.retriever.joint_netvlad_sequential_retriever.JointNetVLADSequentialRetriever + num_matched: 5 + min_score: 0.3 + max_frame_lookahead: 15 + + correspondence_generator: + _target_: gtsfm.frontend.correspondence_generator.det_desc_correspondence_generator.DetDescCorrespondenceGenerator + + detector_descriptor: + _target_: gtsfm.frontend.cacher.detector_descriptor_cacher.DetectorDescriptorCacher + detector_descriptor_obj: + _target_: gtsfm.frontend.detector_descriptor.superpoint.SuperPointDetectorDescriptor + max_keypoints: 5000 + + matcher: + _target_: gtsfm.frontend.cacher.matcher_cacher.MatcherCacher + matcher_obj: + _target_: gtsfm.frontend.matcher.lightglue_matcher.LightGlueMatcher + features: "superpoint" + + two_view_estimator: + _target_: gtsfm.two_view_estimator.TwoViewEstimator + bundle_adjust_2view: True + eval_threshold_px: 4 # in px + ba_reproj_error_thresholds: [0.5] + bundle_adjust_2view_maxiters: 100 + + verifier: + _target_: gtsfm.frontend.verifier.ransac.Ransac + use_intrinsics_in_verification: True + estimation_threshold_px: 4 # for H/E/F estimators + + triangulation_options: + _target_: gtsfm.data_association.point3d_initializer.TriangulationOptions + mode: + _target_: gtsfm.data_association.point3d_initializer.TriangulationSamplingMode + value: NO_RANSAC + + inlier_support_processor: + _target_: gtsfm.two_view_estimator.InlierSupportProcessor + min_num_inliers_est_model: 15 + min_inlier_ratio_est_model: 0.1 + + multiview_optimizer: + _target_: gtsfm.multi_view_optimizer.MultiViewOptimizer + + # comment out to not run + view_graph_estimator: + _target_: gtsfm.view_graph_estimator.cycle_consistent_rotation_estimator.CycleConsistentRotationViewGraphEstimator + edge_error_aggregation_criterion: MEDIAN_EDGE_ERROR + + rot_avg_module: + _target_: gtsfm.averaging.rotation.shonan.ShonanRotationAveraging + + trans_avg_module: + _target_: gtsfm.averaging.translation.averaging_1dsfm.TranslationAveraging1DSFM + robust_measurement_noise: True + projection_sampling_method: SAMPLE_INPUT_MEASUREMENTS + + data_association_module: + _target_: gtsfm.data_association.data_assoc.DataAssociation + min_track_len: 2 + triangulation_options: + _target_: gtsfm.data_association.point3d_initializer.TriangulationOptions + reproj_error_threshold: 10 + mode: + _target_: gtsfm.data_association.point3d_initializer.TriangulationSamplingMode + value: RANSAC_SAMPLE_UNIFORM + max_num_hypotheses: 100 + save_track_patches_viz: False + + bundle_adjustment_module: + _target_: gtsfm.bundle.bundle_adjustment.BundleAdjustmentOptimizer + reproj_error_thresholds: [10, 5, 3] # for (multistage) post-optimization filtering + robust_measurement_noise: True + shared_calib: False + cam_pose3_prior_noise_sigma: 0.1 + calibration_prior_noise_sigma: 1e-5 + measurement_noise_sigma: 1.0 + + # comment out to not run + dense_multiview_optimizer: + _target_: gtsfm.densify.mvs_patchmatchnet.MVSPatchmatchNet diff --git a/gtsfm/retriever/joint_netvlad_sequential_retriever.py b/gtsfm/retriever/joint_netvlad_sequential_retriever.py index 1e0d9a106..7e935f466 100644 --- a/gtsfm/retriever/joint_netvlad_sequential_retriever.py +++ b/gtsfm/retriever/joint_netvlad_sequential_retriever.py @@ -22,17 +22,16 @@ def __init__(self, num_matched: int, min_score: float, max_frame_lookahead: int) """ Args: num_matched: number of K potential matches to provide per query. These are the top "K" matches per query. + min_score: Minimum allowed similarity score to accept a match. max_frame_lookahead: maximum number of consecutive frames to consider for matching/co-visibility. """ super().__init__(matching_regime=ImageMatchingRegime.SEQUENTIAL_WITH_RETRIEVAL) - self._num_matched = num_matched self._similarity_retriever = NetVLADRetriever(num_matched=num_matched, min_score=min_score) self._seq_retriever = SequentialRetriever(max_frame_lookahead=max_frame_lookahead) def __repr__(self) -> str: return f""" JointNetVLADSequentialRetriever: - Num. frames matched: {self._num_matched} Similarity retriever: {self._similarity_retriever} Sequential retriever: {self._seq_retriever} """ diff --git a/gtsfm/runner/gtsfm_runner_base.py b/gtsfm/runner/gtsfm_runner_base.py index cb69a43fa..e410deb37 100644 --- a/gtsfm/runner/gtsfm_runner_base.py +++ b/gtsfm/runner/gtsfm_runner_base.py @@ -58,9 +58,7 @@ def construct_argparser(self) -> argparse.ArgumentParser: default=1, help="Number of threads per each worker", ) - parser.add_argument( - "--worker_memory_limit", type=str, default=None, help="Memory limit per worker, e.g. `8GB`" - ) + parser.add_argument("--worker_memory_limit", type=str, default=None, help="Memory limit per worker, e.g. `8GB`") parser.add_argument( "--config_name", type=str, @@ -98,6 +96,12 @@ def construct_argparser(self) -> argparse.ArgumentParser: default=None, help="maximum number of consecutive frames to consider for matching/co-visibility", ) + parser.add_argument( + "--num_matched", + type=int, + default=None, + help="Number of K potential matches to provide per query. These are the top `K` matches per query.", + ) parser.add_argument( "--share_intrinsics", action="store_true", help="Shares the intrinsics between all the cameras" ) @@ -191,6 +195,21 @@ def construct_scene_optimizer(self) -> SceneOptimizer: scene_optimizer.retriever._max_frame_lookahead = self.parsed_args.max_frame_lookahead elif scene_optimizer.retriever._matching_regime == ImageMatchingRegime.SEQUENTIAL_WITH_RETRIEVAL: scene_optimizer.retriever._seq_retriever._max_frame_lookahead = self.parsed_args.max_frame_lookahead + else: + raise ValueError( + "`max_frame_lookahead` arg is incompatible with retriever matching regime " + f"{scene_optimizer.retriever._matching_regime}" + ) + if self.parsed_args.num_matched is not None: + if scene_optimizer.retriever._matching_regime == ImageMatchingRegime.SEQUENTIAL_WITH_RETRIEVAL: + scene_optimizer.retriever._similarity_retriever._num_matched = self.parsed_args.num_matched + elif scene_optimizer.retriever._matching_regime == ImageMatchingRegime.RETRIEVAL: + scene_optimizer.retriever._num_matched = self.parsed_args.num_matched + else: + raise ValueError( + "`num_matched` arg is incompatible with retriever matching regime " + f"{scene_optimizer.retriever._matching_regime}" + ) if self.parsed_args.mvs_off: scene_optimizer.run_dense_optimizer = False diff --git a/gtsfm/utils/io.py b/gtsfm/utils/io.py index 4013dada5..5a3454d87 100644 --- a/gtsfm/utils/io.py +++ b/gtsfm/utils/io.py @@ -349,7 +349,20 @@ def read_images_txt(fpath: str) -> Tuple[Optional[List[Pose3]], Optional[List[st wTi_list.append(wTi) img_fnames.append(img_fname) - return wTi_list, img_fnames + # TODO(johnwlambert): Re-order tracks for COLMAP-formatted .bin files. + wTi_list_sorted, img_fnames_sorted = sort_image_filenames_lexigraphically(wTi_list, img_fnames) + + return wTi_list_sorted, img_fnames_sorted + + +def sort_image_filenames_lexigraphically(wTi_list: List[Pose3], img_fnames: List[str]) -> Tuple[List[Pose3], List[str]]: + """Sort a list of camera poses according to provided image file names.""" + sorted_idxs = sorted(range(len(img_fnames)), key=lambda i: img_fnames[i]) + + wTi_list_sorted = [wTi_list[i] for i in sorted_idxs] + img_fnames_sorted = [img_fnames[i] for i in sorted_idxs] + + return wTi_list_sorted, img_fnames_sorted def write_images(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: @@ -503,6 +516,7 @@ def read_scene_data_from_colmap_format( if any(x is None for x in [wTi_list, img_fnames, calibrations, point_cloud, rgb]): raise RuntimeError("One or more of the requested model data products was not found.") + print(f"Loaded {len(wTi_list)} cameras with {point_cloud.shape[0]} points.") return wTi_list, img_fnames, calibrations, point_cloud, rgb diff --git a/tests/utils/test_io_utils.py b/tests/utils/test_io_utils.py index 173de94bc..896aedee6 100644 --- a/tests/utils/test_io_utils.py +++ b/tests/utils/test_io_utils.py @@ -232,6 +232,28 @@ def test_json_roundtrip(self) -> None: self.assertEqual(data_from_json["data"][0], None) np.testing.assert_allclose(data["data"][1:], data_from_json["data"][1:]) + def test_sort_image_filenames_lexigraphically(self) -> None: + """Tests that 5 image-camera pose pairs are sorted jointly according to file name.""" + wTi_list = [ + Pose3(Rot3(), np.array([0,0,34])), + Pose3(Rot3(), np.array([0,0,35])), + Pose3(Rot3(), np.array([0,0,36])), + Pose3(Rot3(), np.array([0,0,28])), + Pose3(Rot3(), np.array([0,0,37])) + ] + img_fnames = ['P1180334.JPG', 'P1180335.JPG', 'P1180336.JPG', 'P1180328.JPG', 'P1180337.JPG'] + + wTi_list_sorted, img_fnames_sorted = io_utils.sort_image_filenames_lexigraphically(wTi_list, img_fnames) + + expected_img_fnames_sorted = ['P1180328.JPG', 'P1180334.JPG', 'P1180335.JPG', 'P1180336.JPG', 'P1180337.JPG'] + self.assertEqual(img_fnames_sorted, expected_img_fnames_sorted) + + self.assertEqual(wTi_list_sorted[0].translation()[2], 28) + self.assertEqual(wTi_list_sorted[1].translation()[2], 34) + self.assertEqual(wTi_list_sorted[2].translation()[2], 35) + self.assertEqual(wTi_list_sorted[3].translation()[2], 36) + self.assertEqual(wTi_list_sorted[4].translation()[2], 37) + if __name__ == "__main__": unittest.main() From 812fb4b1c780333cc3e261970ecb25fb78c80fe2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 25 Aug 2023 04:47:20 -0400 Subject: [PATCH 20/30] Bump actions/checkout from 3.5.3 to 3.6.0 (#699) Bumps [actions/checkout](https://github.com/actions/checkout) from 3.5.3 to 3.6.0. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3.5.3...v3.6.0) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/benchmark-self-hosted.yml | 2 +- .github/workflows/benchmark.yml | 2 +- .github/workflows/build-wheels.yml | 2 +- .github/workflows/test-python.yml | 2 +- .github/workflows/test-reproducibility.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/benchmark-self-hosted.yml b/.github/workflows/benchmark-self-hosted.yml index a40ffdab2..a278bdb0b 100644 --- a/.github/workflows/benchmark-self-hosted.yml +++ b/.github/workflows/benchmark-self-hosted.yml @@ -29,7 +29,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v3.5.3 + - uses: actions/checkout@v3.6.0 - name: Cache frontend uses: actions/cache@v3 env: diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 860ec6942..a66630338 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -36,7 +36,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v3.5.3 + - uses: actions/checkout@v3.6.0 - name: Cache frontend uses: actions/cache@v3 env: diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml index da303c1cb..9221abeae 100644 --- a/.github/workflows/build-wheels.yml +++ b/.github/workflows/build-wheels.yml @@ -18,7 +18,7 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v3.5.3 + - uses: actions/checkout@v3.6.0 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 995c0f4df..ca1f92a21 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -14,7 +14,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v3.5.3 + - uses: actions/checkout@v3.6.0 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" diff --git a/.github/workflows/test-reproducibility.yml b/.github/workflows/test-reproducibility.yml index 087483cfb..977fff60f 100644 --- a/.github/workflows/test-reproducibility.yml +++ b/.github/workflows/test-reproducibility.yml @@ -14,7 +14,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v3.5.3 + - uses: actions/checkout@v3.6.0 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" From dbc3b784d80dd8278075cab7ed4b8102bbc0ffc4 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Wed, 30 Aug 2023 00:30:53 -0400 Subject: [PATCH 21/30] Measure view-graph estimation runtime (#700) * make num_matched CLI configurable * add unified config * use unified config * remove unnecessary class attribute * make sure colmap-formatted files are sorted * add config * reformat * Fix typo * add script * include loop over astrovision * num_matched=5, max_frame_lookahead=15 * measure View Graph Estimation runtime --------- Co-authored-by: senselessdev1 Co-authored-by: senselessdev1 --- .../cycle_consistent_rotation_estimator.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/gtsfm/view_graph_estimator/cycle_consistent_rotation_estimator.py b/gtsfm/view_graph_estimator/cycle_consistent_rotation_estimator.py index 26e9d83b1..35ec68367 100644 --- a/gtsfm/view_graph_estimator/cycle_consistent_rotation_estimator.py +++ b/gtsfm/view_graph_estimator/cycle_consistent_rotation_estimator.py @@ -3,6 +3,7 @@ Authors: John Lambert, Ayush Baid, Akshay Krishnan """ import os +import time from collections import defaultdict from enum import Enum from pathlib import Path @@ -99,6 +100,7 @@ def run( Edges of the view-graph, which are the subset of the image pairs in the input args. """ # pylint: disable=unused-argument + start_time = time.time() logger.info("Input number of edges: %d" % len(i2Ri1_dict)) input_edges: List[Tuple[int, int]] = i2Ri1_dict.keys() @@ -138,7 +140,14 @@ def run( valid_edges, cycle_errors, max_gt_error_in_cycle, per_edge_aggregate_error, two_view_reports, output_dir ) - logger.info("Found %d consistent rel. rotations from %d original edges.", len(valid_edges), len(input_edges)) + end_time = time.time() + duration_sec = end_time - start_time + logger.info( + "Found %d consistent rel. rotations from %d original edges in %.2f sec.", + len(valid_edges), + len(input_edges), + duration_sec, + ) return valid_edges From 81a2671d2b2ada7a38cad06fcb6829dc9d9d86f4 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Wed, 30 Aug 2023 00:31:21 -0400 Subject: [PATCH 22/30] Prevent heartbeat timeout (#701) * add code to prevent heartbeat timeout * move to base runner --------- Co-authored-by: senselessdev1 Co-authored-by: senselessdev1 --- gtsfm/runner/gtsfm_runner_base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gtsfm/runner/gtsfm_runner_base.py b/gtsfm/runner/gtsfm_runner_base.py index e410deb37..27e2e8033 100644 --- a/gtsfm/runner/gtsfm_runner_base.py +++ b/gtsfm/runner/gtsfm_runner_base.py @@ -10,6 +10,7 @@ import dask import hydra import numpy as np +from dask import config as dask_config from dask.distributed import Client, LocalCluster, SSHCluster, performance_report from gtsam import Rot3, Unit3 from hydra.utils import instantiate @@ -24,6 +25,8 @@ from gtsfm.two_view_estimator import TWO_VIEW_OUTPUT, TwoViewEstimationReport, run_two_view_estimator_as_futures from gtsfm.ui.process_graph_generator import ProcessGraphGenerator +dask_config.set({"distributed.scheduler.worker-ttl": None}) + logger = logger_utils.get_logger() DEFAULT_OUTPUT_ROOT = Path(__file__).resolve().parent.parent.parent From b324677229b2a58434e57bde1919084e72beb057 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 31 Aug 2023 22:59:28 -0400 Subject: [PATCH 23/30] add script to collect results (#702) --- .../benchmark_wildcat.sh | 0 scripts/collect_results.py | 167 ++++++++++++++++++ 2 files changed, 167 insertions(+) rename benchmark_wildcat.sh => scripts/benchmark_wildcat.sh (100%) create mode 100644 scripts/collect_results.py diff --git a/benchmark_wildcat.sh b/scripts/benchmark_wildcat.sh similarity index 100% rename from benchmark_wildcat.sh rename to scripts/benchmark_wildcat.sh diff --git a/scripts/collect_results.py b/scripts/collect_results.py new file mode 100644 index 000000000..12da1be4f --- /dev/null +++ b/scripts/collect_results.py @@ -0,0 +1,167 @@ +"""Aggregate into a Markdown or LaTeX table results of experiments comparing various front-ends. + +We use the `tabulate` package to print the table to STDOUT. +""" + +import argparse +import datetime +from collections import defaultdict +from pathlib import Path +from typing import DefaultDict, Sequence + +from tabulate import tabulate + +import gtsfm.utils.io as io_utils + + +isp_fname = "verifier_summary_POST_INLIER_SUPPORT_PROCESSOR_2VIEW_REPORT.json" +isp_metrics = ["rot3_angular_errors_deg", "trans_angular_errors_deg", "pose_errors_deg"] + +retriever_fname = "retriever_metrics.json" +retriever_metrics = ["num_input_images", "num_retrieved_image_pairs"] + +vg_fname = "view_graph_estimation_metrics.json" +vg_metrics = [ + "num_input_measurements", + "num_inlier_measurements", + "num_outlier_measurements", + "inlier_R_angular_errors_deg", + "inlier_U_angular_errors_deg", +] + +ra_fname = "rotation_averaging_metrics.json" +ra_metrics = ["rotation_angle_error_deg", "total_duration_sec"] + +ta_fname = "translation_averaging_metrics.json" +ta_metrics = [ + "relative_translation_angle_error_deg", + "translation_angle_error_deg", + "total_duration_sec", + "outier_rejection_duration_sec", + "optimization_duration_sec", +] + +ba_result_fname = "bundle_adjustment_metrics.json" +ba_result_metrics = [ + "number_cameras", + "number_tracks_filtered", + "3d_track_lengths_filtered", + "reprojection_errors_filtered_px", + "rotation_angle_error_deg", + "relative_translation_angle_error_deg", + "translation_angle_error_deg", + "pose_auc_@1_deg", + "pose_auc_@2.5_deg", + "pose_auc_@5_deg", + "pose_auc_@10_deg", + "pose_auc_@20_deg", + "step_0_run_duration_sec", + "step_1_run_duration_sec", + "step_2_run_duration_sec", + "total_run_duration_sec", +] + +# Metrics that **do not** have a median + mean value associated. +SCALAR_METRIC_NAMES = [ + "number_cameras", + "number_tracks_filtered", + "num_input_images", + "num_retrieved_image_pairs", + "pose_auc_@1_deg", + "pose_auc_@2.5_deg", + "pose_auc_@5_deg", + "pose_auc_@10_deg", + "pose_auc_@20_deg", + "step_0_run_duration_sec", + "step_1_run_duration_sec", + "step_2_run_duration_sec", + "total_run_duration_sec", + "total_duration_sec", + "total_duration_sec", + "outier_rejection_duration_sec", + "optimization_duration_sec", + "num_input_measurements", + "num_inlier_measurements", + "num_outlier_measurements", +] + + +def main(user_root: Path, output_fpath: str) -> None: + """ """ + # Store each column as mappings of (key, value) pairs, where (metric_name, experiment_value). + table = defaultdict(list) + headers = ["method_name"] + + experiment_roots = sorted(list(user_root.glob("*-*"))) + + method_idx = 0 + for experiment_root in experiment_roots: + + dirpath = Path(experiment_root) / "result_metrics" + frontend_name = Path(experiment_root).name + table["method_name"].append(frontend_name) + + for json_fname, metric_names, nickname in zip( + [retriever_fname, isp_fname, vg_fname, ra_fname, ta_fname, ba_result_fname], + [retriever_metrics, isp_metrics, vg_metrics, ra_metrics, ta_metrics, ba_result_metrics], + ["retriever", "isp", "vg", "ra", "ta", "ba"], + ): + section_name = Path(json_fname).stem + print(f"{dirpath}/{json_fname}") + json_data = io_utils.read_json_file(f"{dirpath}/{json_fname}")[section_name] + for metric_name in metric_names: + full_metric_name = f"{nickname}_" + " ".join(metric_name.split("_")) + if method_idx == 0: + headers.append(full_metric_name) + + if "pose_auc_" in metric_name and metric_name in SCALAR_METRIC_NAMES: + table[full_metric_name].append(json_data[metric_name] * 100) + elif metric_name in SCALAR_METRIC_NAMES: + print(f"{metric_name}: {json_data[metric_name]}") + table[full_metric_name].append(json_data[metric_name]) + else: + med = f"{json_data[metric_name]['summary']['median']:.2f}" + mean = f"{json_data[metric_name]['summary']['mean']:.2f}" + print(f"Med / Median {metric_name}: {med} / {mean}") + table[full_metric_name].append(f"{med} / {mean}") + method_idx += 1 + + # We treat the defaultdict as a table (dict of iterables). + stdout_lines = tabulate(table, headers, tablefmt="fancy_grid") + print(stdout_lines) + save_table_to_tsv(table, headers, output_fpath) + + +def save_table_to_tsv(table: DefaultDict, headers: Sequence[str], output_fpath: str) -> None: + """Save a table to tsv, with given column headers.""" + content = tabulate(table, headers, tablefmt="tsv") + text_file = open(output_fpath, "w") + text_file.write(content) + text_file.close() + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument( + "--output_fpath", + type=str, + default=f"./{datetime.datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')}_aggregated_results_table.tsv", + help="Output file path where spreadsheet (tsv) containing aggregated results will be written.", + ) + parser.add_argument( + "--user_root", + type=str, + required=True, + help="Root directory where experiment results are stored, with a subdirectory for each experiment.", + ) + args = parser.parse_args() + + user_root = Path(args.user_root) + if not user_root.exists(): + raise FileNotFoundError(f"No directory was found at {user_root}.") + + if Path(args.output_fpath).suffix != ".tsv": + raise ValueError("Output file path must end in `.tsv`") + + main(user_root=user_root, output_fpath=args.output_fpath) From a364160246d6931e2ee1707af57077143f0d390a Mon Sep 17 00:00:00 2001 From: Akshay Krishnan Date: Sat, 2 Sep 2023 02:43:04 +0000 Subject: [PATCH 24/30] review minor comments --- gtsfm/runner/gtsfm_runner_base.py | 8 ++++---- gtsfm/scene_optimizer.py | 2 +- gtsfm/two_view_estimator.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gtsfm/runner/gtsfm_runner_base.py b/gtsfm/runner/gtsfm_runner_base.py index aaa417372..5461f6646 100644 --- a/gtsfm/runner/gtsfm_runner_base.py +++ b/gtsfm/runner/gtsfm_runner_base.py @@ -291,18 +291,18 @@ def run(self) -> GtsfmData: two_view_estimation_duration_sec = time.time() - two_view_estimation_start_time i2Ri1_dict, i2Ui1_dict, v_corr_idxs_dict, two_view_reports_dict = unzip_two_view_results(two_view_results_dict) - two_view_metrics = two_view_estimator.aggregate_frontend_metrics( + two_view_agg_metrics = two_view_estimator.aggregate_frontend_metrics( two_view_reports_dict=two_view_reports_dict, angular_err_threshold_deg=self.scene_optimizer._pose_angular_error_thresh, metric_group_name="verifier_summary_{}".format(two_view_estimator.POST_ISP_REPORT_TAG), ) - two_view_metrics.add_metric( + two_view_agg_metrics.add_metric( GtsfmMetric("total_correspondence_generation_duration_sec", correspondence_generation_duration_sec) ) - two_view_metrics.add_metric( + two_view_agg_metrics.add_metric( GtsfmMetric("total_two_view_estimation_duration_sec", two_view_estimation_duration_sec) ) - all_metrics_groups = [retriever_metrics, two_view_metrics] + all_metrics_groups = [retriever_metrics, two_view_agg_metrics] delayed_sfm_result, delayed_io, delayed_mvo_metrics_groups = self.scene_optimizer.create_computation_graph( keypoints_list=keypoints_list, diff --git a/gtsfm/scene_optimizer.py b/gtsfm/scene_optimizer.py index 0f378762b..193dd0597 100644 --- a/gtsfm/scene_optimizer.py +++ b/gtsfm/scene_optimizer.py @@ -313,7 +313,7 @@ def save_visualizations( plot_ba_input_path: Path, plot_results_path: Path, ) -> List[Delayed]: - """Save SfmData before and after bundle adjustment and camera poses for visualization.i + """Save SfmData before and after bundle adjustment and camera poses for visualization. Accepts delayed GtsfmData before and after bundle adjustment, along with GT poses, saves them and returns a delayed object. diff --git a/gtsfm/two_view_estimator.py b/gtsfm/two_view_estimator.py index abb865e34..4357733cd 100644 --- a/gtsfm/two_view_estimator.py +++ b/gtsfm/two_view_estimator.py @@ -602,7 +602,7 @@ def get_two_view_reports_summary( List of dictionaries, where each dictionary contains the metrics for an image pair. """ - def round_fn(x): + def round_fn(x: Optional[float]) -> Optional[float]: return round(x, 2) if x else None metrics_list = [] From 4e2fb73f8e055b75516ae1060f9425efc6a4fa4b Mon Sep 17 00:00:00 2001 From: John Lambert Date: Sat, 2 Sep 2023 00:02:19 -0400 Subject: [PATCH 25/30] Update README.md (#703) * Update README.md * Remove image_extensions flag from commands in readme --- README.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 171785c32..77e1b578a 100644 --- a/README.md +++ b/README.md @@ -69,13 +69,13 @@ To run SfM with a dataset with only an image directory and EXIF, with image file and run ```python -python gtsfm/runner/run_scene_optimizer_olssonloader.py --config_name {CONFIG_NAME} --dataset_root {DATASET_ROOT} --image_extension jpg --num_workers {NUM_WORKERS} +python gtsfm/runner/run_scene_optimizer_olssonloader.py --config_name {CONFIG_NAME} --dataset_root {DATASET_ROOT} --num_workers {NUM_WORKERS} ``` For example, if you had 4 cores available and wanted to use the Deep Front-End (recommended) on the "door" dataset, you should run: ```bash -python gtsfm/runner/run_scene_optimizer_olssonloader.py --dataset_root tests/data/set1_lund_door --image_extension JPG --config_name deep_front_end.yaml --num_workers 4 +python gtsfm/runner/run_scene_optimizer_olssonloader.py --dataset_root tests/data/set1_lund_door --config_name deep_front_end.yaml --num_workers 4 ``` (or however many workers you desire). @@ -87,7 +87,7 @@ Currently we require EXIF data embedded into your images (or you can provide gro If you would like to compare GTSfM output with COLMAP output, please run: ```python -python gtsfm/runner/run_scene_optimizer_colmaploader.py --config_name {CONFIG_NAME} --images_dir {IMAGES_DIR} --colmap_files_dirpath {COLMAP_FILES_DIRPATH} --image_extension jpg --num_workers {NUM_WORKERS} --max_frame_lookahead {MAX_FRAME_LOOKAHEAD} +python gtsfm/runner/run_scene_optimizer_colmaploader.py --config_name {CONFIG_NAME} --images_dir {IMAGES_DIR} --colmap_files_dirpath {COLMAP_FILES_DIRPATH} --num_workers {NUM_WORKERS} --max_frame_lookahead {MAX_FRAME_LOOKAHEAD} ``` where `COLMAP_FILES_DIRPATH` is a directory where .txt files such as `cameras.txt`, `images.txt`, etc have been saved. @@ -145,7 +145,3 @@ Open-source Python implementation: ``` Note: authors are listed in alphabetical order (by last name). - -## Compiling Additional Verifiers - -On Linux, we have made `pycolmap`'s LORANSAC available in [pypi](https://pypi.org/project/pycolmap/). However, on Mac, `pycolmap` must be built from scratch. See the instructions [here](https://github.com/borglab/gtsfm/blob/master/gtsfm/frontend/verifier/loransac.py#L10). From 9c523fa72653d2873fba8d3714142a22bfbd0a1c Mon Sep 17 00:00:00 2001 From: Akshay Krishnan Date: Sun, 3 Sep 2023 14:53:42 +0000 Subject: [PATCH 26/30] logging and docstring --- gtsfm/bundle/bundle_adjustment.py | 10 ++++++++++ gtsfm/runner/gtsfm_runner_base.py | 3 --- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index aae4726a7..f4ec4ed6d 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -298,6 +298,16 @@ def run_ba_stage_with_filtering( verbose: bool = True, ) -> Tuple[GtsfmData, GtsfmData, List[bool], float]: """Runs bundle adjustment and optionally filters the resulting tracks by reprojection error.""" + logger.info( + "Input: %d tracks on %d cameras", initial_data.number_tracks(), len(initial_data.get_valid_camera_indices()) + ) + if initial_data.number_tracks() == 0 or len(initial_data.get_valid_camera_indices()) == 0: + # No cameras or tracks to optimize, so bundle adjustment is not possible, return invalid result. + logger.error( + "Bundle adjustment aborting, optimization cannot be performed without any tracks or any cameras." + ) + return initial_data, initial_data, [False] * initial_data.number_tracks(), 0.0 + cameras_to_model = self.__cameras_to_model(initial_data, absolute_pose_priors, relative_pose_priors) graph = self.__construct_factor_graph( cameras_to_model=cameras_to_model, diff --git a/gtsfm/runner/gtsfm_runner_base.py b/gtsfm/runner/gtsfm_runner_base.py index 5461f6646..3900e7aa5 100644 --- a/gtsfm/runner/gtsfm_runner_base.py +++ b/gtsfm/runner/gtsfm_runner_base.py @@ -369,9 +369,6 @@ def save_metrics_reports(metrics_group_list: List[GtsfmMetricsGroup], metrics_pa Args: metrics_graph: List of GtsfmMetricsGroup from different modules wrapped as Delayed. metrics_path: Path to directory where computed metrics will be saved. - - Returns: - List of delayed objects after saving metrics. """ # Save metrics to JSON From 814381d486878cafc1693554ef50d0a72badfef2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Sep 2023 15:16:29 +0000 Subject: [PATCH 27/30] Bump actions/checkout from 3.6.0 to 4.0.0 Bumps [actions/checkout](https://github.com/actions/checkout) from 3.6.0 to 4.0.0. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3.6.0...v4.0.0) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/benchmark-self-hosted.yml | 2 +- .github/workflows/benchmark.yml | 2 +- .github/workflows/build-wheels.yml | 2 +- .github/workflows/test-python.yml | 2 +- .github/workflows/test-reproducibility.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/benchmark-self-hosted.yml b/.github/workflows/benchmark-self-hosted.yml index a278bdb0b..125a78398 100644 --- a/.github/workflows/benchmark-self-hosted.yml +++ b/.github/workflows/benchmark-self-hosted.yml @@ -29,7 +29,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v3.6.0 + - uses: actions/checkout@v4.0.0 - name: Cache frontend uses: actions/cache@v3 env: diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index a66630338..660e140de 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -36,7 +36,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v3.6.0 + - uses: actions/checkout@v4.0.0 - name: Cache frontend uses: actions/cache@v3 env: diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml index 9221abeae..a2c8becca 100644 --- a/.github/workflows/build-wheels.yml +++ b/.github/workflows/build-wheels.yml @@ -18,7 +18,7 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v3.6.0 + - uses: actions/checkout@v4.0.0 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index ca1f92a21..9d62b9b1b 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -14,7 +14,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v3.6.0 + - uses: actions/checkout@v4.0.0 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" diff --git a/.github/workflows/test-reproducibility.yml b/.github/workflows/test-reproducibility.yml index 977fff60f..583102fc5 100644 --- a/.github/workflows/test-reproducibility.yml +++ b/.github/workflows/test-reproducibility.yml @@ -14,7 +14,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v3.6.0 + - uses: actions/checkout@v4.0.0 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" From a6694a3125fcd35ddf40e295433427fcc9a6ea42 Mon Sep 17 00:00:00 2001 From: akshay-krishnan Date: Mon, 4 Sep 2023 19:23:00 +0000 Subject: [PATCH 28/30] update docstring --- gtsfm/bundle/bundle_adjustment.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/gtsfm/bundle/bundle_adjustment.py b/gtsfm/bundle/bundle_adjustment.py index f4ec4ed6d..0126143ef 100644 --- a/gtsfm/bundle/bundle_adjustment.py +++ b/gtsfm/bundle/bundle_adjustment.py @@ -297,7 +297,22 @@ def run_ba_stage_with_filtering( reproj_error_thresh: Optional[float], verbose: bool = True, ) -> Tuple[GtsfmData, GtsfmData, List[bool], float]: - """Runs bundle adjustment and optionally filters the resulting tracks by reprojection error.""" + """Runs bundle adjustment and optionally filters the resulting tracks by reprojection error. + + Args: + initial_data: Initialized cameras, tracks w/ their 3d landmark from triangulation. + absolute_pose_priors: Priors to be used on cameras. + relative_pose_priors: Priors on the pose between two cameras. + reproj_error_thresh: Maximum 3D track reprojection error, for filtering tracks after BA. + verbose: Boolean flag to print out additional info for debugging. + + Results: + Optimized camera poses, 3D point w/ tracks, and error metrics, aligned to GT (if provided). + Optimized camera poses after filtering landmarks (and cameras with no remaining landmarks). + Valid mask as a list of booleans, indicating for each input track whether it was below the re-projection + threshold. + Final error value of the optimization problem. + """ logger.info( "Input: %d tracks on %d cameras", initial_data.number_tracks(), len(initial_data.get_valid_camera_indices()) ) From d9f0052094b1bee0c352efa7c85edf0909998f1a Mon Sep 17 00:00:00 2001 From: Akshay Krishnan Date: Mon, 4 Sep 2023 18:34:11 -0400 Subject: [PATCH 29/30] remove the image_extensions flag (almost) completely (#695) * Update README.md * update all calls to use util * remove typo * remove unused imports * remove traces in one_d_sfm_loader, benchmarking --- .github/scripts/execute_single_benchmark.sh | 9 ++++---- .../execute_single_benchmark_self_hosted.sh | 9 ++++---- .github/workflows/benchmark-self-hosted.yml | 2 -- .github/workflows/benchmark.yml | 2 -- gtsfm/loader/loader_base.py | 3 +-- gtsfm/loader/olsson_loader.py | 11 +--------- gtsfm/loader/one_d_sfm_loader.py | 7 ++----- gtsfm/runner/run_scene_optimizer_1dsfm.py | 2 -- gtsfm/utils/io.py | 21 +++++++++++++++++++ gtsfm/visualization/view_scene_olsson.py | 1 - 10 files changed, 33 insertions(+), 34 deletions(-) diff --git a/.github/scripts/execute_single_benchmark.sh b/.github/scripts/execute_single_benchmark.sh index e1b3f4fb4..e11c2f31a 100755 --- a/.github/scripts/execute_single_benchmark.sh +++ b/.github/scripts/execute_single_benchmark.sh @@ -3,10 +3,9 @@ DATASET_NAME=$1 CONFIG_NAME=$2 MAX_FRAME_LOOKAHEAD=$3 -IMAGE_EXTENSION=$4 -LOADER_NAME=$5 -MAX_RESOLUTION=$6 -SHARE_INTRINSICS=$7 +LOADER_NAME=$4 +MAX_RESOLUTION=$5 +SHARE_INTRINSICS=$6 # Extract the data, configure arguments for runner. if [ "$DATASET_NAME" == "door-12" ]; then @@ -37,7 +36,7 @@ elif [ "$DATASET_NAME" == "south-building-128" ]; then fi echo "Config: ${CONFIG_NAME}, Loader: ${LOADER_NAME}" -echo "Max. Frame Lookahead: ${MAX_FRAME_LOOKAHEAD}, Image Extension: ${IMAGE_EXTENSION}, Max. Resolution: ${MAX_RESOLUTION}" +echo "Max. Frame Lookahead: ${MAX_FRAME_LOOKAHEAD}, Max. Resolution: ${MAX_RESOLUTION}" echo "Share intrinsics for all images? ${SHARE_INTRINSICS}" # Setup the command line arg if intrinsics are to be shared diff --git a/.github/scripts/execute_single_benchmark_self_hosted.sh b/.github/scripts/execute_single_benchmark_self_hosted.sh index 2a296bf29..964792868 100755 --- a/.github/scripts/execute_single_benchmark_self_hosted.sh +++ b/.github/scripts/execute_single_benchmark_self_hosted.sh @@ -8,10 +8,9 @@ DATASET_PREFIX=/usr/local/gtsfm-data DATASET_NAME=$1 CONFIG_NAME=$2 MAX_FRAME_LOOKAHEAD=$3 -IMAGE_EXTENSION=$4 -LOADER_NAME=$5 -MAX_RESOLUTION=$6 -SHARE_INTRINSICS=$7 +LOADER_NAME=$4 +MAX_RESOLUTION=$5 +SHARE_INTRINSICS=$6 # Extract the data, configure arguments for runner. if [ "$DATASET_NAME" == "skydio-501" ]; then @@ -20,7 +19,7 @@ if [ "$DATASET_NAME" == "skydio-501" ]; then fi echo "Config: ${CONFIG_NAME}, Loader: ${LOADER_NAME}" -echo "Max. Frame Lookahead: ${MAX_FRAME_LOOKAHEAD}, Image Extension: ${IMAGE_EXTENSION}, Max. Resolution: ${MAX_RESOLUTION}" +echo "Max. Frame Lookahead: ${MAX_FRAME_LOOKAHEAD}, Max. Resolution: ${MAX_RESOLUTION}" echo "Share intrinsics for all images? ${SHARE_INTRINSICS}" # Setup the command line arg if intrinsics are to be shared diff --git a/.github/workflows/benchmark-self-hosted.yml b/.github/workflows/benchmark-self-hosted.yml index a278bdb0b..936eed28e 100644 --- a/.github/workflows/benchmark-self-hosted.yml +++ b/.github/workflows/benchmark-self-hosted.yml @@ -67,7 +67,6 @@ jobs: DATASET_NAME=${{ matrix.config_dataset_info[1] }} CONFIG_NAME=${{ matrix.config_dataset_info[0] }} MAX_FRAME_LOOKAHEAD=${{ matrix.config_dataset_info[2] }} - IMAGE_EXTENSION=${{ matrix.config_dataset_info[3] }} LOADER_NAME=${{ matrix.config_dataset_info[5] }} MAX_RESOLUTION=${{ matrix.config_dataset_info[6] }} SHARE_INTRINSICS=${{ matrix.config_dataset_info[7] }} @@ -78,7 +77,6 @@ jobs: $DATASET_NAME \ $CONFIG_NAME \ $MAX_FRAME_LOOKAHEAD \ - $IMAGE_EXTENSION \ $LOADER_NAME \ $MAX_RESOLUTION \ $SHARE_INTRINSICS diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index a66630338..11c7475db 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -65,7 +65,6 @@ jobs: DATASET_NAME=${{ matrix.config_dataset_info[1] }} CONFIG_NAME=${{ matrix.config_dataset_info[0] }} MAX_FRAME_LOOKAHEAD=${{ matrix.config_dataset_info[2] }} - IMAGE_EXTENSION=${{ matrix.config_dataset_info[3] }} LOADER_NAME=${{ matrix.config_dataset_info[5] }} MAX_RESOLUTION=${{ matrix.config_dataset_info[6] }} SHARE_INTRINSICS=${{ matrix.config_dataset_info[7] }} @@ -73,7 +72,6 @@ jobs: $DATASET_NAME \ $CONFIG_NAME \ $MAX_FRAME_LOOKAHEAD \ - $IMAGE_EXTENSION \ $LOADER_NAME \ $MAX_RESOLUTION \ $SHARE_INTRINSICS diff --git a/gtsfm/loader/loader_base.py b/gtsfm/loader/loader_base.py index 698258077..2041d9760 100644 --- a/gtsfm/loader/loader_base.py +++ b/gtsfm/loader/loader_base.py @@ -4,7 +4,6 @@ """ import abc -import glob import logging from typing import Dict, List, Optional, Tuple @@ -415,7 +414,7 @@ def get_images_with_exif(self, search_path: str) -> Tuple[List[str], int]: The number of all the images. ] """ - all_image_paths = glob.glob(search_path) + all_image_paths = io_utils.get_sorted_image_names_in_dir(search_path) num_all_imgs = len(all_image_paths) exif_image_paths = [] for single_img_path in all_image_paths: diff --git a/gtsfm/loader/olsson_loader.py b/gtsfm/loader/olsson_loader.py index a6d3b8b87..80ac6a678 100644 --- a/gtsfm/loader/olsson_loader.py +++ b/gtsfm/loader/olsson_loader.py @@ -3,7 +3,6 @@ Authors: John Lambert """ -import glob import os from pathlib import Path from typing import List, Optional @@ -16,8 +15,6 @@ from gtsfm.common.image import Image from gtsfm.loader.loader_base import LoaderBase -IMG_EXTENSIONS = ["png", "PNG", "jpg", "JPG"] - class OlssonLoader(LoaderBase): """Simple loader class that reads any of Carl Olsson's datasets from a folder on disk. @@ -60,13 +57,7 @@ def __init__( self._use_gt_extrinsics = use_gt_extrinsics self._max_frame_lookahead = max_frame_lookahead - self._image_paths = [] - for extension in IMG_EXTENSIONS: - search_path = os.path.join(folder, "images", f"*.{extension}") - self._image_paths.extend(glob.glob(search_path)) - - # sort the file names - self._image_paths.sort() + self._image_paths = io_utils.get_sorted_image_names_in_dir(os.path.join(folder, "images")) self._num_imgs = len(self._image_paths) if self._num_imgs == 0: diff --git a/gtsfm/loader/one_d_sfm_loader.py b/gtsfm/loader/one_d_sfm_loader.py index 81148b566..55388accf 100644 --- a/gtsfm/loader/one_d_sfm_loader.py +++ b/gtsfm/loader/one_d_sfm_loader.py @@ -3,7 +3,6 @@ Authors: Yanwei Du """ -import glob import os from pathlib import Path from typing import List, Optional @@ -36,7 +35,6 @@ class OneDSFMLoader(LoaderBase): def __init__( self, folder: str, - image_extension: str = "jpg", max_resolution: int = 640, enable_no_exif: bool = False, default_focal_length_factor: float = 1.2, @@ -45,7 +43,6 @@ def __init__( Args: folder: the base folder which contains image sequence. - image_extension: file extension for the image files. Defaults to 'jpg'. max_resolution: integer representing maximum length of image's short side, i.e. the smaller of the height/width of the image. e.g. for 1080p (1920 x 1080), max_resolution would be 1080. If the image resolution max(height, width) is @@ -59,10 +56,10 @@ def __init__( self._default_focal_length_factor = default_focal_length_factor # Fetch all the file names in /images folder. - search_path = os.path.join(folder, "images", f"*.{image_extension}") + search_path = os.path.join(folder, "images") if enable_no_exif: - self._image_paths = glob.glob(search_path) + self._image_paths = io_utils.get_sorted_image_names_in_dir(search_path) else: (self._image_paths, num_all_imgs) = self.get_images_with_exif(search_path) logger.info("Read %d images with exif out of %d in total.", len(self._image_paths), num_all_imgs) diff --git a/gtsfm/runner/run_scene_optimizer_1dsfm.py b/gtsfm/runner/run_scene_optimizer_1dsfm.py index 6d3d138e5..8320e500c 100644 --- a/gtsfm/runner/run_scene_optimizer_1dsfm.py +++ b/gtsfm/runner/run_scene_optimizer_1dsfm.py @@ -20,7 +20,6 @@ class GtsfmRunnerOneDSFMLoader(GtsfmRunnerBase): def construct_argparser(self) -> argparse.ArgumentParser: parser = super(GtsfmRunnerOneDSFMLoader, self).construct_argparser() parser.add_argument("--dataset_root", type=str, default="", help="") - parser.add_argument("--image_extension", type=str, default="jpg", help="") parser.add_argument("--enable_no_exif", action="store_true", help="") parser.add_argument("--default_focal_length_factor", type=float, default=1.2, help="") return parser @@ -28,7 +27,6 @@ def construct_argparser(self) -> argparse.ArgumentParser: def construct_loader(self) -> LoaderBase: loader = OneDSFMLoader( folder=self.parsed_args.dataset_root, - image_extension=self.parsed_args.image_extension, enable_no_exif=self.parsed_args.enable_no_exif, default_focal_length_factor=self.parsed_args.default_focal_length_factor, ) diff --git a/gtsfm/utils/io.py b/gtsfm/utils/io.py index 5d664a91f..a4996f79f 100644 --- a/gtsfm/utils/io.py +++ b/gtsfm/utils/io.py @@ -2,6 +2,7 @@ Authors: Ayush Baid, John Lambert """ +import glob import os import pickle from bz2 import BZ2File @@ -33,6 +34,9 @@ logger = logger_utils.get_logger() +IMG_EXTENSIONS = ["png", "PNG", "jpg", "JPG"] + + def load_image(img_path: str) -> Image: """Load the image from disk. @@ -633,3 +637,20 @@ def read_point_cloud_from_ply(ply_fpath: str) -> Tuple[np.ndarray, np.ndarray]: """ pointcloud = open3d.io.read_point_cloud(ply_fpath) return open3d_vis_utils.convert_colored_open3d_point_cloud_to_numpy(pointcloud) + + +def get_sorted_image_names_in_dir(dir_path: str) -> List[str]: + """Finds all jpg and png images in directory and returns their names in sorted order. + + Args: + dir_path: Path to directory containing images. + + Returns: + image_paths: List of image names in sorted order. + """ + image_paths = [] + for extension in IMG_EXTENSIONS: + search_path = os.path.join(dir_path, f"*.{extension}") + image_paths.extend(glob.glob(search_path)) + + return sorted(image_paths) diff --git a/gtsfm/visualization/view_scene_olsson.py b/gtsfm/visualization/view_scene_olsson.py index 30bdffd21..9025b245a 100644 --- a/gtsfm/visualization/view_scene_olsson.py +++ b/gtsfm/visualization/view_scene_olsson.py @@ -43,7 +43,6 @@ def view_scene(args: argparse.Namespace) -> None: if __name__ == "__main__": parser = argparse.ArgumentParser(description="Visualize Olsson dataset w/ Open3d.") parser.add_argument("--dataset_root", type=str, default=os.path.join(TEST_DATA_ROOT, "set1_lund_door"), help="") - parser.add_argument("--image_extension", type=str, default="JPG", help="") parser.add_argument( "--rendering_style", type=str, From 2007ba59138dd4f139481ad55e153c4110110c4e Mon Sep 17 00:00:00 2001 From: Hayk Stepanyan <54330853+stepanyanhayk@users.noreply.github.com> Date: Mon, 4 Sep 2023 22:19:27 -0400 Subject: [PATCH 30/30] DSF Track Estimation C++ (#618) * integrate c++ version * cleanup * organize imports * remove explicit imports from gtsam * add unittest for track estimation * improve comments * python black reformat * python black reformat * reformat python black * add side by side C++ DSF DA code * add unit test on C++ version * black reformat * measure runtime * remove unused import * fix typo * clean up comment ascii art * Switch to gtsam 4.2 instead of 4.2a8 * Use c++ based Dsf track estimator --------- --- environment_linux.yml | 2 +- environment_linux_cpuonly.yml | 2 +- environment_mac.yml | 2 +- .../cpp_dsf_tracks_estimator.py | 83 +++++++++++++++++++ .../data_association/dsf_tracks_estimator.py | 8 +- gtsfm/multi_view_optimizer.py | 4 +- .../test_cpp_dsf_tracks_estimator.py | 12 +++ .../test_dsf_tracks_estimator.py | 59 ++++++++++++- tests/utils/test_io_utils.py | 14 ++-- 9 files changed, 168 insertions(+), 18 deletions(-) create mode 100644 gtsfm/data_association/cpp_dsf_tracks_estimator.py create mode 100644 tests/data_association/test_cpp_dsf_tracks_estimator.py diff --git a/environment_linux.yml b/environment_linux.yml index c9227feb9..6c0f837bd 100644 --- a/environment_linux.yml +++ b/environment_linux.yml @@ -52,5 +52,5 @@ dependencies: - colour - pycolmap>=0.1.0 - trimesh[easy] - - gtsam==4.2a8 + - gtsam==4.2 - pydot diff --git a/environment_linux_cpuonly.yml b/environment_linux_cpuonly.yml index f9f7bc37f..9de9d997a 100644 --- a/environment_linux_cpuonly.yml +++ b/environment_linux_cpuonly.yml @@ -52,5 +52,5 @@ dependencies: - colour - pycolmap>=0.1.0 - trimesh[easy] - - gtsam==4.2a8 + - gtsam==4.2 - pydot diff --git a/environment_mac.yml b/environment_mac.yml index e1615694d..1d173f0c2 100644 --- a/environment_mac.yml +++ b/environment_mac.yml @@ -57,5 +57,5 @@ dependencies: - colour - trimesh[easy] - pycolmap>=0.1.0 - - gtsam==4.2a8 + - gtsam==4.2 - pydot diff --git a/gtsfm/data_association/cpp_dsf_tracks_estimator.py b/gtsfm/data_association/cpp_dsf_tracks_estimator.py new file mode 100644 index 000000000..a34fc7de3 --- /dev/null +++ b/gtsfm/data_association/cpp_dsf_tracks_estimator.py @@ -0,0 +1,83 @@ +"""Estimates tracks from feature correspondences using the Union-Find algorithm. + +The unique key for each measurement is a tuple of (Image ID, keypoint index for that image). + +References: +1. P. Moulon, P. Monasse. Unordered Feature Tracking Made Fast and Easy, 2012, HAL Archives. + https://hal-enpc.archives-ouvertes.fr/hal-00769267/file/moulon_monasse_featureTracking_CVMP12.pdf + +Authors: Ayush Baid, Sushmita Warrier, John Lambert, Travis Driver +""" + +import time +from typing import Dict, List, Tuple + +import gtsam +import numpy as np + +import gtsfm.utils.logger as logger_utils +from gtsfm.common.keypoints import Keypoints +from gtsfm.common.sfm_track import SfmMeasurement, SfmTrack2d +from gtsfm.data_association.tracks_estimator_base import TracksEstimatorBase + +logger = logger_utils.get_logger() + + +class CppDsfTracksEstimator(TracksEstimatorBase): + """Estimates tracks using a disjoint-set forest (DSF), with core logic implemented in C++.""" + + def run(self, matches_dict: Dict[Tuple[int, int], np.ndarray], keypoints_list: List[Keypoints]) -> List[SfmTrack2d]: + """Estimate tracks from feature correspondences. + + Compare to GTSAM's `python/gtsam/tests/test_DsfTrackGenerator.py`. + + Creates a disjoint-set forest (DSF) and 2d tracks from pairwise matches. We create a singleton for union-find + set elements from camera index of a detection and the index of that detection in that camera's keypoint list, + i.e. (i,k). + + Args: + matches_dict: Dict of pairwise matches of type: + key: indices for the matched pair of images + val: feature indices, as array of Nx2 shape; N being number of features. A row is (feature_idx1, + feature_idx2). + keypoints_list: List of keypoints for each image. + + Returns: + List of all valid SfmTrack2d generated by the matches. + """ + start_time = time.time() + # Check to ensure dimensions of coordinates are correct. + dims_valid = all([kps.coordinates.ndim == 2 for kps in keypoints_list]) + if not dims_valid: + raise Exception("Dimensions for Keypoint coordinates incorrect. Array needs to be 2D") + + # For each image pair (i1,i2), we provide a (K,2) matrix of corresponding keypoint indices (k1,k2). + # (Converts python dict into gtsam.MatchIndicesMap.) + matches_map = gtsam.MatchIndicesMap() + for (i1, i2), corr_idxs in matches_dict.items(): + matches_map[gtsam.IndexPair(i1, i2)] = corr_idxs + + # Convert gtsfm Keypoints into gtsam Keypoints. + keypoints_vector = gtsam.KeypointsVector() + for keypoint in keypoints_list: + keypoints_vector.append(gtsam.gtsfm.Keypoints(keypoint.coordinates)) + + tracks = gtsam.gtsfm.tracksFromPairwiseMatches( + matches_map, + keypoints_vector, + verbose=True, + ) + + track_2d_list = [] + for track in tracks: + # Converting gtsam SfmTrack2d into gtsfm SfmTrack2d. + # Note: `indexVector` contains the camera indices for each of the (K,) 2d (u,v) measurements. + # `measurementMatrix` contains the measurements as a 2D matrix (K,2). + track_ = SfmTrack2d( + [SfmMeasurement(i, uv) for (i, uv) in zip(track.indexVector(), track.measurementMatrix())] + ) + track_2d_list.append(track_) + + duration = time.time() - start_time + logger.info("CppDsfTracksEstimator took %.2f sec. to estimate %d tracks.", duration, len(track_2d_list)) + return track_2d_list diff --git a/gtsfm/data_association/dsf_tracks_estimator.py b/gtsfm/data_association/dsf_tracks_estimator.py index 38e4c5bd2..ef47b7fdd 100644 --- a/gtsfm/data_association/dsf_tracks_estimator.py +++ b/gtsfm/data_association/dsf_tracks_estimator.py @@ -8,6 +8,7 @@ Authors: Ayush Baid, Sushmita Warrier, John Lambert, Travis Driver """ +import time from typing import Dict, List, Tuple import gtsam @@ -22,7 +23,7 @@ class DsfTracksEstimator(TracksEstimatorBase): - """Estimates tracks using a disjoint-set forest (DSF).""" + """Estimates tracks using a disjoint-set forest (DSF), with core logic implemented in Python.""" def run(self, matches_dict: Dict[Tuple[int, int], np.ndarray], keypoints_list: List[Keypoints]) -> List[SfmTrack2d]: """Estimate tracks from feature correspondences. @@ -41,7 +42,8 @@ def run(self, matches_dict: Dict[Tuple[int, int], np.ndarray], keypoints_list: L Returns: list of all valid SfmTrack2d generated by the matches. """ - # check to ensure dimensions of coordinates are correct + start_time = time.time() + # Check to ensure dimensions of coordinates are correct dims_valid = all([kps.coordinates.ndim == 2 for kps in keypoints_list if kps is not None]) if not dims_valid: raise Exception("Dimensions for Keypoint coordinates incorrect. Array needs to be 2D") @@ -86,4 +88,6 @@ def run(self, matches_dict: Dict[Tuple[int, int], np.ndarray], keypoints_list: L logger.info( f"DSF Union-Find: {erroneous_track_pct:.2f}% of tracks discarded from multiple obs. in a single image." ) + duration = time.time() - start_time + logger.info("DsfTracksEstimator took %.2f sec. to estimate %d tracks.", duration, len(track_2d_list)) return track_2d_list diff --git a/gtsfm/multi_view_optimizer.py b/gtsfm/multi_view_optimizer.py index 92aafd8d0..f8924df93 100644 --- a/gtsfm/multi_view_optimizer.py +++ b/gtsfm/multi_view_optimizer.py @@ -22,7 +22,7 @@ from gtsfm.data_association.data_assoc import DataAssociation from gtsfm.evaluation.metrics import GtsfmMetricsGroup from gtsfm.view_graph_estimator.view_graph_estimator_base import ViewGraphEstimatorBase -from gtsfm.data_association.dsf_tracks_estimator import DsfTracksEstimator +from gtsfm.data_association.cpp_dsf_tracks_estimator import CppDsfTracksEstimator from gtsfm.common.two_view_estimation_report import TwoViewEstimationReport @@ -193,5 +193,5 @@ def init_cameras( def get_2d_tracks( corr_idxs_dict: Dict[Tuple[int, int], np.ndarray], keypoints_list: List[Keypoints] ) -> List[SfmTrack2d]: - tracks_estimator = DsfTracksEstimator() + tracks_estimator = CppDsfTracksEstimator() return tracks_estimator.run(corr_idxs_dict, keypoints_list) diff --git a/tests/data_association/test_cpp_dsf_tracks_estimator.py b/tests/data_association/test_cpp_dsf_tracks_estimator.py new file mode 100644 index 000000000..30a4a5c04 --- /dev/null +++ b/tests/data_association/test_cpp_dsf_tracks_estimator.py @@ -0,0 +1,12 @@ +"""Unit tests for the CppDsfTracksEstimator class.""" + + +from gtsfm.data_association.cpp_dsf_tracks_estimator import CppDsfTracksEstimator +from tests.data_association.test_dsf_tracks_estimator import TestDsfTracksEstimator + + +class TestCppDsfTracksEstimator(TestDsfTracksEstimator): + """ """ + + def setUp(self): + self.estimator = CppDsfTracksEstimator() diff --git a/tests/data_association/test_dsf_tracks_estimator.py b/tests/data_association/test_dsf_tracks_estimator.py index 12a3651b8..18a9fffae 100644 --- a/tests/data_association/test_dsf_tracks_estimator.py +++ b/tests/data_association/test_dsf_tracks_estimator.py @@ -60,6 +60,57 @@ def test_generate_tracks_from_pairwise_matches_nontransitive( print(tracks) self.assertEqual(len(tracks), 0, "Tracks not filtered correctly") + def test_track_generation(self) -> None: + """Ensures that DSF generates three tracks from measurements in 3 images (H=200,W=400). + + Analogous to test found at github.com/borglab/gtsam/blob/develop/python/gtsam/tests/test_DsfTrackGenerator.py + """ + kps_i0 = Keypoints(np.array([[10.0, 20], [30, 40]])) + kps_i1 = Keypoints(np.array([[50.0, 60], [70, 80], [90, 100]])) + kps_i2 = Keypoints(np.array([[110.0, 120], [130, 140]])) + + keypoints_list = [] + keypoints_list.append(kps_i0) + keypoints_list.append(kps_i1) + keypoints_list.append(kps_i2) + + # For each image pair (i1,i2), we provide a (K,2) matrix of corresponding keypoint indices (k1,k2). + matches_dict = {} + matches_dict[(0, 1)] = np.array([[0, 0], [1, 1]]) + matches_dict[(1, 2)] = np.array([[2, 0], [1, 1]]) + + tracks = self.estimator.run( + matches_dict, + keypoints_list, + ) + assert len(tracks) == 3 + + # Verify track 0. + track0 = tracks[0] + assert track0.number_measurements() == 2 + np.testing.assert_allclose(track0.measurement(0).uv, np.array([10, 20])) + np.testing.assert_allclose(track0.measurement(1).uv, np.array([50, 60])) + assert track0.measurement(0).i == 0 + assert track0.measurement(1).i == 1 + + # Verify track 1. + track1 = tracks[1] + assert track1.number_measurements() == 3 + np.testing.assert_allclose(track1.measurement(0).uv, np.array([30, 40])) + np.testing.assert_allclose(track1.measurement(1).uv, np.array([70, 80])) + np.testing.assert_allclose(track1.measurement(2).uv, np.array([130, 140])) + assert track1.measurement(0).i == 0 + assert track1.measurement(1).i == 1 + assert track1.measurement(2).i == 2 + + # Verify track 2. + track2 = tracks[2] + assert track2.number_measurements() == 2 + np.testing.assert_allclose(track2.measurement(0).uv, np.array([90, 100])) + np.testing.assert_allclose(track2.measurement(1).uv, np.array([110, 120])) + assert track2.measurement(0).i == 1 + assert track2.measurement(1).i == 2 + def get_dummy_keypoints_list() -> List[Keypoints]: """ """ @@ -124,10 +175,10 @@ def get_dummy_matches() -> Dict[Tuple[int, int], np.ndarray]: def get_nontransitive_matches() -> Dict[Tuple[int, int], np.ndarray]: """Set up correspondences for each (i1,i2) pair that violates transitivity. - (i=0, k=0) - | \ - | \ - (i=1, k=2)--(i=2,k=3)--(i=3, k=4)--(i=1, k=1) + (i=0, k=0) (i=0, k=1) + | \\ | + | \\ | + (i=1, k=2)--(i=2,k=3)--(i=3, k=4) Transitivity is violated due to the match between frames 0 and 3. """ diff --git a/tests/utils/test_io_utils.py b/tests/utils/test_io_utils.py index 896aedee6..e0dd42eb5 100644 --- a/tests/utils/test_io_utils.py +++ b/tests/utils/test_io_utils.py @@ -235,17 +235,17 @@ def test_json_roundtrip(self) -> None: def test_sort_image_filenames_lexigraphically(self) -> None: """Tests that 5 image-camera pose pairs are sorted jointly according to file name.""" wTi_list = [ - Pose3(Rot3(), np.array([0,0,34])), - Pose3(Rot3(), np.array([0,0,35])), - Pose3(Rot3(), np.array([0,0,36])), - Pose3(Rot3(), np.array([0,0,28])), - Pose3(Rot3(), np.array([0,0,37])) + Pose3(Rot3(), np.array([0, 0, 34])), + Pose3(Rot3(), np.array([0, 0, 35])), + Pose3(Rot3(), np.array([0, 0, 36])), + Pose3(Rot3(), np.array([0, 0, 28])), + Pose3(Rot3(), np.array([0, 0, 37])), ] - img_fnames = ['P1180334.JPG', 'P1180335.JPG', 'P1180336.JPG', 'P1180328.JPG', 'P1180337.JPG'] + img_fnames = ["P1180334.JPG", "P1180335.JPG", "P1180336.JPG", "P1180328.JPG", "P1180337.JPG"] wTi_list_sorted, img_fnames_sorted = io_utils.sort_image_filenames_lexigraphically(wTi_list, img_fnames) - expected_img_fnames_sorted = ['P1180328.JPG', 'P1180334.JPG', 'P1180335.JPG', 'P1180336.JPG', 'P1180337.JPG'] + expected_img_fnames_sorted = ["P1180328.JPG", "P1180334.JPG", "P1180335.JPG", "P1180336.JPG", "P1180337.JPG"] self.assertEqual(img_fnames_sorted, expected_img_fnames_sorted) self.assertEqual(wTi_list_sorted[0].translation()[2], 28)