refactor: Cluster features for GNN pipeline (#3356)

This refactors the way (cluster)features for the GNN pipeline are computed. Also extends the `Examples::Cluster` to support a global position which is needed by athena data (Updates to that will be merged later).
acts-project · Jul 11, 2024 · b5069e1 · b5069e1
1 parent fa78ed3
commit b5069e1
Show file tree

Hide file tree

Showing 4 changed files with 143 additions and 62 deletions.
diff --git a/...kFindingExaTrkX/include/ActsExamples/TrackFindingExaTrkX/TrackFindingAlgorithmExaTrkX.hpp b/...kFindingExaTrkX/include/ActsExamples/TrackFindingExaTrkX/TrackFindingAlgorithmExaTrkX.hpp
@@ -31,16 +31,31 @@ namespace ActsExamples {
 
 class TrackFindingAlgorithmExaTrkX final : public IAlgorithm {
  public:
+  enum class NodeFeature {
+    eR,
+    ePhi,
+    eX,
+    eY,
+    eZ,
+    eEta,
+    eCellCount,
+    eCellSum,
+    eClusterX,
+    eClusterY,
+    eCluster1R,
+    eCluster2R,
+    eCluster1Phi,
+    eCluster2Phi,
+    eCluster1Z,
+    eCluster2Z,
+    eCluster1Eta,
+    eCluster2Eta,
+  };
+
   struct Config {
     /// Input spacepoints collection.
     std::string inputSpacePoints;
-
-    /// Input cluster information (Optional). If given, the following features
-    /// are added:
-    /// * cell count
-    /// * sum cell activations
-    /// * cluster size in local x
-    /// * cluster size in local y
+    /// Input cluster information (Optional).
     std::string inputClusters;
 
     /// Input simhits (Optional).
@@ -62,14 +77,12 @@ class TrackFindingAlgorithmExaTrkX final : public IAlgorithm {
 
     std::shared_ptr<Acts::TrackBuildingBase> trackBuilder;
 
-    /// Scaling of the input features
-    float rScale = 1.f;
-    float phiScale = 1.f;
-    float zScale = 1.f;
-    float cellCountScale = 1.f;
-    float cellSumScale = 1.f;
-    float clusterXScale = 1.f;
-    float clusterYScale = 1.f;
+    /// Node features
+    std::vector<NodeFeature> nodeFeatures = {NodeFeature::eR, NodeFeature::ePhi,
+                                             NodeFeature::eZ};
+
+    /// Feature scales
+    std::vector<float> featureScales = {1.f, 1.f, 1.f};
 
     /// Remove track candidates with 2 or less hits
     bool filterShortTracks = false;

diff --git a/Examples/Algorithms/TrackFindingExaTrkX/src/TrackFindingAlgorithmExaTrkX.cpp b/Examples/Algorithms/TrackFindingExaTrkX/src/TrackFindingAlgorithmExaTrkX.cpp
@@ -119,6 +119,14 @@ class ExamplesEdmHook : public Acts::ExaTrkXHook {
   }
 };
 
+// TODO do we have these function in the repo somewhere?
+float theta(float r, float z) {
+  return std::atan2(r, z);
+}
+float eta(float r, float z) {
+  return -std::log(std::tan(0.5 * theta(r, z)));
+}
+
 }  // namespace
 
 ActsExamples::TrackFindingAlgorithmExaTrkX::TrackFindingAlgorithmExaTrkX(
@@ -166,18 +174,29 @@ ActsExamples::TrackFindingAlgorithmExaTrkX::TrackFindingAlgorithmExaTrkX(
   m_timing.classifierTimes.resize(
       m_cfg.edgeClassifiers.size(),
       decltype(m_timing.classifierTimes)::value_type{0.f});
+
+  // Check if we want cluster features but do not have them
+  const static std::array clFeatures = {
+      NodeFeature::eClusterX, NodeFeature::eClusterY,  NodeFeature::eCellCount,
+      NodeFeature::eCellSum,  NodeFeature::eCluster1R, NodeFeature::eCluster2R};
+
+  auto wantClFeatures = std::any_of(
+      m_cfg.nodeFeatures.begin(), m_cfg.nodeFeatures.end(), [&](const auto& f) {
+        return std::find(clFeatures.begin(), clFeatures.end(), f) !=
+               clFeatures.end();
+      });
+
+  if (wantClFeatures && !m_inputClusters.isInitialized()) {
+    throw std::invalid_argument("Cluster features requested, but not provided");
+  }
+
+  if (m_cfg.nodeFeatures.size() != m_cfg.featureScales.size()) {
+    throw std::invalid_argument(
+        "Number of features mismatches number of scale parameters.");
+  }
 }
 
 /// Allow access to features with nice names
-enum feat : std::size_t {
-  eR = 0,
-  ePhi,
-  eZ,
-  eCellCount,
-  eCellSum,
-  eClusterX,
-  eClusterY
-};
 
 ActsExamples::ProcessCode ActsExamples::TrackFindingAlgorithmExaTrkX::execute(
     const ActsExamples::AlgorithmContext& ctx) const {
@@ -200,54 +219,70 @@ ActsExamples::ProcessCode ActsExamples::TrackFindingAlgorithmExaTrkX::execute(
   // Convert Input data to a list of size [num_measurements x
   // measurement_features]
   const std::size_t numSpacepoints = spacepoints.size();
-  const std::size_t numFeatures = clusters ? 7 : 3;
-  ACTS_INFO("Received " << numSpacepoints << " spacepoints");
+  const std::size_t numFeatures = m_cfg.nodeFeatures.size();
+  ACTS_DEBUG("Received " << numSpacepoints << " spacepoints");
+  ACTS_DEBUG("Construct " << numFeatures << " node features");
 
   std::vector<float> features(numSpacepoints * numFeatures);
   std::vector<int> spacepointIDs;
 
   spacepointIDs.reserve(spacepoints.size());
 
-  double sumCells = 0.0;
-  double sumActivation = 0.0;
+  for (auto isp = 0ul; isp < numSpacepoints; ++isp) {
+    const auto& sp = spacepoints[isp];
 
-  for (auto i = 0ul; i < numSpacepoints; ++i) {
-    const auto& sp = spacepoints[i];
+    // For now just take the first index since does require one single index
+    // per spacepoint
+    // TODO does it work for the module map construction to use only the first
+    // sp?
+    const auto& sl1 = sp.sourceLinks()[0].template get<IndexSourceLink>();
+
+    // TODO this makes it a bit useless, refactor so we do not need to pass this
+    // to the pipeline
+    spacepointIDs.push_back(isp);
+
+    // This should be fine, because check in constructor
+    Cluster* cl1 = clusters ? &clusters->at(sl1.index()) : nullptr;
+    Cluster* cl2 = cl1;
+
+    if (sp.sourceLinks().size() == 2) {
+      const auto& sl2 = sp.sourceLinks()[1].template get<IndexSourceLink>();
+      cl2 = clusters ? &clusters->at(sl2.index()) : nullptr;
+    }
 
     // I would prefer to use a std::span or boost::span here once available
-    float* featurePtr = features.data() + i * numFeatures;
+    float* f = features.data() + isp * numFeatures;
+
+    using NF = NodeFeature;
+
+    for (auto ift = 0ul; ift < numFeatures; ++ift) {
+      // clang-format off
+      switch(m_cfg.nodeFeatures[ift]) {
+        break; case NF::eR:           f[ift] = std::hypot(sp.x(), sp.y());
+        break; case NF::ePhi:         f[ift] = std::atan2(sp.y(), sp.x());
+        break; case NF::eZ:           f[ift] = sp.z();
+        break; case NF::eX:           f[ift] = sp.x();
+        break; case NF::eY:           f[ift] = sp.y();
+        break; case NF::eEta:         f[ift] = eta(std::hypot(sp.x(), sp.y()), sp.z());
+        break; case NF::eClusterX:    f[ift] = cl1->sizeLoc0;
+        break; case NF::eClusterY:    f[ift] = cl1->sizeLoc1;
+        break; case NF::eCellSum:     f[ift] = cl1->sumActivations();
+        break; case NF::eCellCount:   f[ift] = cl1->channels.size();
+        break; case NF::eCluster1R:   f[ift] = std::hypot(cl1->globalPosition[Acts::ePos0], cl1->globalPosition[Acts::ePos1]);
+        break; case NF::eCluster2R:   f[ift] = std::hypot(cl2->globalPosition[Acts::ePos0], cl2->globalPosition[Acts::ePos1]);
+        break; case NF::eCluster1Phi: f[ift] = std::atan2(cl1->globalPosition[Acts::ePos1], cl1->globalPosition[Acts::ePos0]);
+        break; case NF::eCluster2Phi: f[ift] = std::atan2(cl2->globalPosition[Acts::ePos1], cl2->globalPosition[Acts::ePos0]);
+        break; case NF::eCluster1Z:   f[ift] = cl1->globalPosition[Acts::ePos2];
+        break; case NF::eCluster2Z:   f[ift] = cl2->globalPosition[Acts::ePos2];
+        break; case NF::eCluster1Eta: f[ift] = eta(std::hypot(cl1->globalPosition[Acts::ePos0], cl1->globalPosition[Acts::ePos1]), cl1->globalPosition[Acts::ePos2]);
+        break; case NF::eCluster2Eta: f[ift] = eta(std::hypot(cl2->globalPosition[Acts::ePos0], cl2->globalPosition[Acts::ePos1]), cl2->globalPosition[Acts::ePos2]);
+      }
+      // clang-format on
 
-    // For now just take the first index since does require one single index
-    // per spacepoint
-    const auto& sl = sp.sourceLinks()[0].template get<IndexSourceLink>();
-    spacepointIDs.push_back(sl.index());
-
-    featurePtr[eR] = std::hypot(sp.x(), sp.y()) / m_cfg.rScale;
-    featurePtr[ePhi] = std::atan2(sp.y(), sp.x()) / m_cfg.phiScale;
-    featurePtr[eZ] = sp.z() / m_cfg.zScale;
-
-    if (clusters) {
-      const auto& cluster = clusters->at(sl.index());
-      const auto& chnls = cluster.channels;
-
-      featurePtr[eCellCount] = cluster.channels.size() / m_cfg.cellCountScale;
-      featurePtr[eCellSum] =
-          std::accumulate(chnls.begin(), chnls.end(), 0.0,
-                          [](double s, const Cluster::Cell& c) {
-                            return s + c.activation;
-                          }) /
-          m_cfg.cellSumScale;
-      featurePtr[eClusterX] = cluster.sizeLoc0 / m_cfg.clusterXScale;
-      featurePtr[eClusterY] = cluster.sizeLoc1 / m_cfg.clusterYScale;
-
-      sumCells += featurePtr[eCellCount];
-      sumActivation += featurePtr[eCellSum];
+      f[ift] /= m_cfg.featureScales[ift];
     }
   }
 
-  ACTS_DEBUG("Avg cell count: " << sumCells / spacepoints.size());
-  ACTS_DEBUG("Avg activation: " << sumActivation / sumCells);
-
   // Run the pipeline
   const auto trackCandidates = [&]() {
     std::lock_guard<std::mutex> lock(m_mutex);

diff --git a/Examples/Framework/include/ActsExamples/EventData/Cluster.hpp b/Examples/Framework/include/ActsExamples/EventData/Cluster.hpp
@@ -10,6 +10,8 @@
 
 #include "ActsFatras/Digitization/Segmentizer.hpp"
 
+#include <numeric>
+#include <optional>
 #include <vector>
 
 namespace ActsExamples {
@@ -20,6 +22,15 @@ struct Cluster {
   std::size_t sizeLoc0 = 0;
   std::size_t sizeLoc1 = 0;
   std::vector<Cell> channels;
+
+  // TODO make this be provided by Fatras?
+  Acts::Vector3 globalPosition = Acts::Vector3::Zero();
+
+  double sumActivations() const {
+    return std::accumulate(
+        channels.begin(), channels.end(), 0.0,
+        [](double s, const Cluster::Cell& c) { return s + c.activation; });
+  }
 };
 
 /// Clusters have a one-to-one relation with measurements

diff --git a/Examples/Python/src/ExaTrkXTrackFinding.cpp b/Examples/Python/src/ExaTrkXTrackFinding.cpp
@@ -167,14 +167,36 @@ void addExaTrkXTrackFinding(Context &ctx) {
   }
 #endif
 
+  py::enum_<TrackFindingAlgorithmExaTrkX::NodeFeature>(mex, "NodeFeature")
+      .value("R", TrackFindingAlgorithmExaTrkX::NodeFeature::eR)
+      .value("Phi", TrackFindingAlgorithmExaTrkX::NodeFeature::ePhi)
+      .value("Z", TrackFindingAlgorithmExaTrkX::NodeFeature::eZ)
+      .value("X", TrackFindingAlgorithmExaTrkX::NodeFeature::eX)
+      .value("Y", TrackFindingAlgorithmExaTrkX::NodeFeature::eY)
+      .value("Eta", TrackFindingAlgorithmExaTrkX::NodeFeature::eEta)
+      .value("ClusterX", TrackFindingAlgorithmExaTrkX::NodeFeature::eClusterX)
+      .value("ClusterY", TrackFindingAlgorithmExaTrkX::NodeFeature::eClusterY)
+      .value("CellCount", TrackFindingAlgorithmExaTrkX::NodeFeature::eCellCount)
+      .value("CellSum", TrackFindingAlgorithmExaTrkX::NodeFeature::eCellSum)
+      .value("Cluster1R", TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster1R)
+      .value("Cluster2R", TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster2R)
+      .value("Cluster1Phi",
+             TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster1Phi)
+      .value("Cluster2Phi",
+             TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster2Phi)
+      .value("Cluster1Z", TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster1Z)
+      .value("Cluster2Z", TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster2Z)
+      .value("Cluster1Eta",
+             TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster1Eta)
+      .value("Cluster2Eta",
+             TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster2Eta);
+
   ACTS_PYTHON_DECLARE_ALGORITHM(
       ActsExamples::TrackFindingAlgorithmExaTrkX, mex,
       "TrackFindingAlgorithmExaTrkX", inputSpacePoints, inputSimHits,
       inputParticles, inputClusters, inputMeasurementSimhitsMap,
       outputProtoTracks, outputGraph, graphConstructor, edgeClassifiers,
-      trackBuilder, rScale, phiScale, zScale, cellCountScale, cellSumScale,
-      clusterXScale, clusterYScale, filterShortTracks, targetMinHits,
-      targetMinPT);
+      trackBuilder, nodeFeatures, featureScales, filterShortTracks);
 
   {
     auto cls =