refactor: Move MLAmbiguitySolver to Core (#3272)

This PR moves the MLAmbiguitySolver to Core, this will allow us to test it more easily with ATLAS in the future. It also removes the DBScan version of this algorithm as it was shown to be way less effective.  ## Summary by CodeRabbit - **New Features** - Enhanced performance monitoring capabilities with the addition of machine learning-based metrics. - Integrated a new machine learning function for ambiguity resolution in track finding workflows. - Introduced a new `TrackTruthMatcher` algorithm to improve track validation. - **Bug Fixes** - Corrected naming conventions for the `CsvSpacePointWriter` class and its methods. - **Chores** - Removed deprecated configurations and algorithms to streamline the codebase. - Updated header inclusions to reflect new naming conventions and class structures.
acts-project · Dec 6, 2024 · f57e260 · f57e260
1 parent a646e06
commit f57e260
Show file tree

Hide file tree

Showing 24 changed files with 339 additions and 441 deletions.
diff --git a/CI/physmon/phys_perf_mon.sh b/CI/physmon/phys_perf_mon.sh
@@ -265,6 +265,15 @@ function trackfinding() {
             $path/performance_finding_ckf_ambi.html \
             $path/performance_finding_ckf_ambi
     fi
+
+    if [ -f $refdir/$path/performance_finding_ckf_ml_solver.root ]; then
+        run_histcmp \
+            $outdir/data/$path/performance_finding_ckf_ml_solver.root \
+            $refdir/$path/performance_finding_ckf_ml_solver.root \
+            "ML Ambisolver | ${name}" \
+            $path/performance_finding_ckf_ml_solver.html \
+            $path/performance_finding_ckf_ml_solver
+    fi
 }
 
 function vertexing() {

diff --git a/CI/physmon/reference/trackfinding_ttbar_pu200/performance_finding_ckf_ml_solver.root b/CI/physmon/reference/trackfinding_ttbar_pu200/performance_finding_ckf_ml_solver.root
diff --git a/CI/physmon/reference/trackfinding_ttbar_pu200/performance_fitting_ckf_ml_solver.root b/CI/physmon/reference/trackfinding_ttbar_pu200/performance_fitting_ckf_ml_solver.root
diff --git a/CI/physmon/workflows/physmon_trackfinding_ttbar_pu200.py b/CI/physmon/workflows/physmon_trackfinding_ttbar_pu200.py
@@ -19,7 +19,9 @@
     CkfConfig,
     addCKFTracks,
     addAmbiguityResolution,
+    addAmbiguityResolutionML,
     AmbiguityResolutionConfig,
+    AmbiguityResolutionMLConfig,
     addVertexFitting,
     VertexFinder,
     TrackSelectorConfig,
@@ -134,13 +136,25 @@
         outputDirRoot=tp,
     )
 
+    addAmbiguityResolutionML(
+        s,
+        AmbiguityResolutionMLConfig(
+            maximumSharedHits=3, maximumIterations=1000000, nMeasurementsMin=6
+        ),
+        tracks="ckf_tracks",
+        outputDirRoot=tp,
+        onnxModelFile=Path(__file__).resolve().parent.parent.parent.parent
+        / "thirdparty/OpenDataDetector/data/duplicateClassifier.onnx",
+    )
+
     addAmbiguityResolution(
         s,
         AmbiguityResolutionConfig(
             maximumSharedHits=3,
             maximumIterations=100000,
             nMeasurementsMin=6,
         ),
+        tracks="ckf_tracks",
         outputDirRoot=tp,
     )
 
@@ -187,6 +201,17 @@
         tp / "performance_fitting_ambi.root",
         tp / "performance_fitting_ckf_ambi.root",
     )
+
+    shutil.move(
+        tp / "performance_finding_ambiML.root",
+        tp / "performance_finding_ckf_ml_solver.root",
+    )
+
+    shutil.move(
+        tp / "performance_fitting_ambiML.root",
+        tp / "performance_fitting_ckf_ml_solver.root",
+    )
+
     for vertexing in ["amvf_gauss_notime", "amvf_grid_time"]:
         shutil.move(
             tp / f"{vertexing}/performance_vertexing.root",
@@ -200,6 +225,8 @@
         "performance_fitting_ckf.root",
         "performance_finding_ckf_ambi.root",
         "performance_fitting_ckf_ambi.root",
+        "performance_finding_ckf_ml_solver.root",
+        "performance_fitting_ckf_ml_solver.root",
         "performance_vertexing_amvf_gauss_notime.root",
         "performance_vertexing_amvf_grid_time.root",
     ]:

diff --git a/Core/include/Acts/AmbiguityResolution/AmbiguityNetworkConcept.hpp b/Core/include/Acts/AmbiguityResolution/AmbiguityNetworkConcept.hpp
@@ -0,0 +1,51 @@
+// This file is part of the ACTS project.
+//
+// Copyright (C) 2016 CERN for the benefit of the ACTS project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include "Acts/EventData/TrackContainer.hpp"
+#include "Acts/EventData/TrackContainerFrontendConcept.hpp"
+#include "Acts/EventData/VectorMultiTrajectory.hpp"
+#include "Acts/EventData/VectorTrackContainer.hpp"
+#include "Acts/Utilities/Concepts.hpp"
+
+namespace Acts {
+
+/// @brief Concept for the ambiguity network used in the ambiguity resolution
+///
+/// The ambiguity network correspond to the AmbiguityTrackClassifier found in
+/// the Onnx plugin. It is used to score the tracks and select the best ones.
+///
+/// The constructor of the Ambiguity Solver network should take string as input
+/// corresponding to the path of the ONNX model.
+/// The implementation of the Ambiguity Solver network should have two methods:
+/// - inferScores: takes clusters (a list of track ID associated with a cluster
+/// ID) and the track container and return an outputTensor (list of scores for
+///                each track in the clusters).
+/// - trackSelection: Takes clusters and the output tensor from the inferScores
+///                   method and return the list of track ID to keep.
+///
+/// @tparam N the type of the network
+template <typename network_t>
+concept AmbiguityNetworkConcept = requires(
+    TrackContainer<VectorTrackContainer, VectorMultiTrajectory,
+                   detail::ValueHolder> &tracks,
+    std::unordered_map<std::size_t, std::vector<std::size_t>> &clusters,
+    std::vector<std::vector<float>> &outputTensor, const char *modelPath,
+    network_t &n) {
+  { network_t(modelPath) } -> std::same_as<network_t>;
+
+  {
+    n.inferScores(clusters, tracks)
+  } -> std::same_as<std::vector<std::vector<float>>>;
+  {
+    n.trackSelection(clusters, outputTensor)
+  } -> std::same_as<std::vector<std::size_t>>;
+};
+
+}  // namespace Acts
diff --git a/Core/include/Acts/AmbiguityResolution/AmbiguityResolutionML.hpp b/Core/include/Acts/AmbiguityResolution/AmbiguityResolutionML.hpp
@@ -0,0 +1,136 @@
+// This file is part of the ACTS project.
+//
+// Copyright (C) 2016 CERN for the benefit of the ACTS project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include "Acts/AmbiguityResolution/AmbiguityNetworkConcept.hpp"
+#include "Acts/Definitions/Units.hpp"
+#include "Acts/EventData/TrackContainer.hpp"
+#include "Acts/Utilities/Delegate.hpp"
+#include "Acts/Utilities/Logger.hpp"
+
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+namespace Acts {
+
+/// Generic implementation of the machine learning ambiguity resolution
+/// Contains method for data preparations
+template <AmbiguityNetworkConcept AmbiguityNetwork>
+class AmbiguityResolutionML {
+ public:
+  struct Config {
+    /// Path to the model file for the duplicate neural network
+    std::string inputDuplicateNN = "";
+    /// Minimum number of measurement to form a track.
+    std::size_t nMeasurementsMin = 7;
+  };
+  /// Construct the ambiguity resolution algorithm.
+  ///
+  /// @param cfg is the algorithm configuration
+  /// @param logger is the logging instance
+  AmbiguityResolutionML(const Config& cfg,
+                        std::unique_ptr<const Logger> logger = getDefaultLogger(
+                            "AmbiguityResolutionML", Logging::INFO))
+      : m_cfg{cfg},
+        m_duplicateClassifier(m_cfg.inputDuplicateNN.c_str()),
+        m_logger{std::move(logger)} {}
+
+  /// Associate the hits to the tracks
+  ///
+  /// This algorithm performs the mapping of hits ID to track ID. Our final goal
+  /// is too loop over all the tracks (and their associated hits) by order of
+  /// decreasing number hits for this we use a multimap where the key is the
+  /// number of hits as this will automatically perform the sorting.
+  ///
+  /// @param tracks is the input track container
+  /// @param sourceLinkHash is the hash function for the source link, will be used to associate to tracks
+  /// @param sourceLinkEquality is the equality function for the source link used used to associated hits to tracks
+  /// @return an ordered list containing pairs of track ID and associated measurement ID
+  template <TrackContainerFrontend track_container_t,
+            typename source_link_hash_t, typename source_link_equality_t>
+  std::multimap<int, std::pair<std::size_t, std::vector<std::size_t>>>
+  mapTrackHits(const track_container_t& tracks,
+               const source_link_hash_t& sourceLinkHash,
+               const source_link_equality_t& sourceLinkEquality) const {
+    // A map to store (and generate) the measurement index for each source link
+    auto measurementIndexMap =
+        std::unordered_map<SourceLink, std::size_t, source_link_hash_t,
+                           source_link_equality_t>(0, sourceLinkHash,
+                                                   sourceLinkEquality);
+
+    // A map to store the track Id and their associated measurements ID, a
+    // multimap is used to automatically sort the tracks by the number of
+    // measurements
+    std::multimap<int, std::pair<std::size_t, std::vector<std::size_t>>>
+        trackMap;
+    std::size_t trackIndex = 0;
+    std::vector<std::size_t> measurements;
+    // Loop over all the trajectories in the events
+    for (const auto& track : tracks) {
+      // Kick out tracks that do not fulfill our initial requirements
+      if (track.nMeasurements() < m_cfg.nMeasurementsMin) {
+        continue;
+      }
+      measurements.clear();
+      for (auto ts : track.trackStatesReversed()) {
+        if (ts.typeFlags().test(Acts::TrackStateFlag::MeasurementFlag)) {
+          SourceLink sourceLink = ts.getUncalibratedSourceLink();
+          // assign a new measurement index if the source link was not seen yet
+          auto emplace = measurementIndexMap.try_emplace(
+              sourceLink, measurementIndexMap.size());
+          measurements.push_back(emplace.first->second);
+        }
+      }
+      trackMap.emplace(track.nMeasurements(),
+                       std::make_pair(trackIndex, measurements));
+      ++trackIndex;
+    }
+    return trackMap;
+  }
+
+  /// Select the track associated with each cluster
+  ///
+  /// In this algorithm the call the neural network to score the tracks and then
+  /// select the track with the highest score in each cluster
+  ///
+  /// @param clusters is a map of clusters, each cluster correspond to a vector of track ID
+  /// @param tracks is the input track container
+  /// @return a vector of trackID corresponding tho the good tracks
+  template <TrackContainerFrontend track_container_t>
+  std::vector<std::size_t> solveAmbiguity(
+      std::unordered_map<std::size_t, std::vector<std::size_t>>& clusters,
+      const track_container_t& tracks) const {
+    std::vector<std::vector<float>> outputTensor =
+        m_duplicateClassifier.inferScores(clusters, tracks);
+    std::vector<std::size_t> goodTracks =
+        m_duplicateClassifier.trackSelection(clusters, outputTensor);
+
+    return goodTracks;
+  }
+
+ private:
+  // Configuration
+  Config m_cfg;
+
+  // The neural network for duplicate classification, the network
+  // implementation is chosen with the AmbiguityNetwork template parameter
+  AmbiguityNetwork m_duplicateClassifier;
+
+  /// Logging instance
+  std::unique_ptr<const Logger> m_logger = nullptr;
+
+  /// Private access to logging instance
+  const Logger& logger() const { return *m_logger; }
+};
+
+}  // namespace Acts
diff --git a/Core/include/Acts/TrackFinding/detail/AmbiguityTrackClustering.hpp b/Core/include/Acts/TrackFinding/detail/AmbiguityTrackClustering.hpp
@@ -15,7 +15,15 @@
 
 namespace Acts::detail {
 
-/// Clusterise tracks based on shared hits
+/// Cluster tracks based on shared hits.
+///
+/// In this algorithm we will loop through all the tracks by decreasing number
+/// of measurements. Cluster are created when a new track is encountered that
+/// doesn't share hits with the leading track of a previous cluster (with the
+/// leading track defined as the track that lead to the cluster creation). If a
+/// track shares hits with the leading track of a cluster, it is added to that
+/// cluster. If a track shares hits with multiple clusters, it is associated to
+/// the cluster with the leading track with the most hits.
 ///
 /// @param trackMap : Multimap storing pair of track ID and vector of measurement ID. The keys are the number of measurement and are just there to facilitate the ordering.
 /// @return an unordered map representing the clusters, the keys the ID of the primary track of each cluster and the store a vector of track IDs.

diff --git a/Core/src/TrackFinding/AmbiguityTrackClustering.cpp b/Core/src/TrackFinding/AmbiguityTrackClustering.cpp
@@ -21,8 +21,9 @@ Acts::detail::clusterDuplicateTracks(
   // different clusters.
   std::unordered_map<std::size_t, std::size_t> hitToTrack;
 
-  // Loop over all the tracks
-  for (const auto& [_, trackValue] : trackMap) {
+  // Loop backward over all the tracks
+  for (auto track = trackMap.rbegin(); track != trackMap.rend(); ++track) {
+    const auto& trackValue = track->second;
     std::vector<std::size_t> hits = trackValue.second;
     auto matchedTrack = hitToTrack.end();
     // Loop over all the hits in the track

diff --git a/Examples/Algorithms/TrackFindingML/CMakeLists.txt b/Examples/Algorithms/TrackFindingML/CMakeLists.txt
@@ -1,7 +1,5 @@
 set(SOURCES
-    src/AmbiguityResolutionML.cpp
     src/AmbiguityResolutionMLAlgorithm.cpp
-    src/AmbiguityResolutionMLDBScanAlgorithm.cpp
     src/SeedFilterMLAlgorithm.cpp
 )
 

diff --git a/...gorithms/TrackFindingML/include/ActsExamples/TrackFindingML/AmbiguityDBScanClustering.hpp b/...gorithms/TrackFindingML/include/ActsExamples/TrackFindingML/AmbiguityDBScanClustering.hpp