Skip to content

Commit

Permalink
refactor: Cluster features for GNN pipeline (#3356)
Browse files Browse the repository at this point in the history
This refactors the way (cluster)features for the GNN pipeline are computed. Also extends the `Examples::Cluster` to support a global position which is needed by athena data (Updates to that will be merged later).
  • Loading branch information
benjaminhuth authored Jul 11, 2024
1 parent fa78ed3 commit b5069e1
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,31 @@ namespace ActsExamples {

class TrackFindingAlgorithmExaTrkX final : public IAlgorithm {
public:
enum class NodeFeature {
eR,
ePhi,
eX,
eY,
eZ,
eEta,
eCellCount,
eCellSum,
eClusterX,
eClusterY,
eCluster1R,
eCluster2R,
eCluster1Phi,
eCluster2Phi,
eCluster1Z,
eCluster2Z,
eCluster1Eta,
eCluster2Eta,
};

struct Config {
/// Input spacepoints collection.
std::string inputSpacePoints;

/// Input cluster information (Optional). If given, the following features
/// are added:
/// * cell count
/// * sum cell activations
/// * cluster size in local x
/// * cluster size in local y
/// Input cluster information (Optional).
std::string inputClusters;

/// Input simhits (Optional).
Expand All @@ -62,14 +77,12 @@ class TrackFindingAlgorithmExaTrkX final : public IAlgorithm {

std::shared_ptr<Acts::TrackBuildingBase> trackBuilder;

/// Scaling of the input features
float rScale = 1.f;
float phiScale = 1.f;
float zScale = 1.f;
float cellCountScale = 1.f;
float cellSumScale = 1.f;
float clusterXScale = 1.f;
float clusterYScale = 1.f;
/// Node features
std::vector<NodeFeature> nodeFeatures = {NodeFeature::eR, NodeFeature::ePhi,
NodeFeature::eZ};

/// Feature scales
std::vector<float> featureScales = {1.f, 1.f, 1.f};

/// Remove track candidates with 2 or less hits
bool filterShortTracks = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,14 @@ class ExamplesEdmHook : public Acts::ExaTrkXHook {
}
};

// TODO do we have these function in the repo somewhere?
float theta(float r, float z) {
return std::atan2(r, z);
}
float eta(float r, float z) {
return -std::log(std::tan(0.5 * theta(r, z)));
}

} // namespace

ActsExamples::TrackFindingAlgorithmExaTrkX::TrackFindingAlgorithmExaTrkX(
Expand Down Expand Up @@ -166,18 +174,29 @@ ActsExamples::TrackFindingAlgorithmExaTrkX::TrackFindingAlgorithmExaTrkX(
m_timing.classifierTimes.resize(
m_cfg.edgeClassifiers.size(),
decltype(m_timing.classifierTimes)::value_type{0.f});

// Check if we want cluster features but do not have them
const static std::array clFeatures = {
NodeFeature::eClusterX, NodeFeature::eClusterY, NodeFeature::eCellCount,
NodeFeature::eCellSum, NodeFeature::eCluster1R, NodeFeature::eCluster2R};

auto wantClFeatures = std::any_of(
m_cfg.nodeFeatures.begin(), m_cfg.nodeFeatures.end(), [&](const auto& f) {
return std::find(clFeatures.begin(), clFeatures.end(), f) !=
clFeatures.end();
});

if (wantClFeatures && !m_inputClusters.isInitialized()) {
throw std::invalid_argument("Cluster features requested, but not provided");
}

if (m_cfg.nodeFeatures.size() != m_cfg.featureScales.size()) {
throw std::invalid_argument(
"Number of features mismatches number of scale parameters.");
}
}

/// Allow access to features with nice names
enum feat : std::size_t {
eR = 0,
ePhi,
eZ,
eCellCount,
eCellSum,
eClusterX,
eClusterY
};

ActsExamples::ProcessCode ActsExamples::TrackFindingAlgorithmExaTrkX::execute(
const ActsExamples::AlgorithmContext& ctx) const {
Expand All @@ -200,54 +219,70 @@ ActsExamples::ProcessCode ActsExamples::TrackFindingAlgorithmExaTrkX::execute(
// Convert Input data to a list of size [num_measurements x
// measurement_features]
const std::size_t numSpacepoints = spacepoints.size();
const std::size_t numFeatures = clusters ? 7 : 3;
ACTS_INFO("Received " << numSpacepoints << " spacepoints");
const std::size_t numFeatures = m_cfg.nodeFeatures.size();
ACTS_DEBUG("Received " << numSpacepoints << " spacepoints");
ACTS_DEBUG("Construct " << numFeatures << " node features");

std::vector<float> features(numSpacepoints * numFeatures);
std::vector<int> spacepointIDs;

spacepointIDs.reserve(spacepoints.size());

double sumCells = 0.0;
double sumActivation = 0.0;
for (auto isp = 0ul; isp < numSpacepoints; ++isp) {
const auto& sp = spacepoints[isp];

for (auto i = 0ul; i < numSpacepoints; ++i) {
const auto& sp = spacepoints[i];
// For now just take the first index since does require one single index
// per spacepoint
// TODO does it work for the module map construction to use only the first
// sp?
const auto& sl1 = sp.sourceLinks()[0].template get<IndexSourceLink>();

// TODO this makes it a bit useless, refactor so we do not need to pass this
// to the pipeline
spacepointIDs.push_back(isp);

// This should be fine, because check in constructor
Cluster* cl1 = clusters ? &clusters->at(sl1.index()) : nullptr;
Cluster* cl2 = cl1;

if (sp.sourceLinks().size() == 2) {
const auto& sl2 = sp.sourceLinks()[1].template get<IndexSourceLink>();
cl2 = clusters ? &clusters->at(sl2.index()) : nullptr;
}

// I would prefer to use a std::span or boost::span here once available
float* featurePtr = features.data() + i * numFeatures;
float* f = features.data() + isp * numFeatures;

using NF = NodeFeature;

for (auto ift = 0ul; ift < numFeatures; ++ift) {
// clang-format off
switch(m_cfg.nodeFeatures[ift]) {
break; case NF::eR: f[ift] = std::hypot(sp.x(), sp.y());
break; case NF::ePhi: f[ift] = std::atan2(sp.y(), sp.x());
break; case NF::eZ: f[ift] = sp.z();
break; case NF::eX: f[ift] = sp.x();
break; case NF::eY: f[ift] = sp.y();
break; case NF::eEta: f[ift] = eta(std::hypot(sp.x(), sp.y()), sp.z());
break; case NF::eClusterX: f[ift] = cl1->sizeLoc0;
break; case NF::eClusterY: f[ift] = cl1->sizeLoc1;
break; case NF::eCellSum: f[ift] = cl1->sumActivations();
break; case NF::eCellCount: f[ift] = cl1->channels.size();
break; case NF::eCluster1R: f[ift] = std::hypot(cl1->globalPosition[Acts::ePos0], cl1->globalPosition[Acts::ePos1]);
break; case NF::eCluster2R: f[ift] = std::hypot(cl2->globalPosition[Acts::ePos0], cl2->globalPosition[Acts::ePos1]);
break; case NF::eCluster1Phi: f[ift] = std::atan2(cl1->globalPosition[Acts::ePos1], cl1->globalPosition[Acts::ePos0]);
break; case NF::eCluster2Phi: f[ift] = std::atan2(cl2->globalPosition[Acts::ePos1], cl2->globalPosition[Acts::ePos0]);
break; case NF::eCluster1Z: f[ift] = cl1->globalPosition[Acts::ePos2];
break; case NF::eCluster2Z: f[ift] = cl2->globalPosition[Acts::ePos2];
break; case NF::eCluster1Eta: f[ift] = eta(std::hypot(cl1->globalPosition[Acts::ePos0], cl1->globalPosition[Acts::ePos1]), cl1->globalPosition[Acts::ePos2]);
break; case NF::eCluster2Eta: f[ift] = eta(std::hypot(cl2->globalPosition[Acts::ePos0], cl2->globalPosition[Acts::ePos1]), cl2->globalPosition[Acts::ePos2]);
}
// clang-format on

// For now just take the first index since does require one single index
// per spacepoint
const auto& sl = sp.sourceLinks()[0].template get<IndexSourceLink>();
spacepointIDs.push_back(sl.index());

featurePtr[eR] = std::hypot(sp.x(), sp.y()) / m_cfg.rScale;
featurePtr[ePhi] = std::atan2(sp.y(), sp.x()) / m_cfg.phiScale;
featurePtr[eZ] = sp.z() / m_cfg.zScale;

if (clusters) {
const auto& cluster = clusters->at(sl.index());
const auto& chnls = cluster.channels;

featurePtr[eCellCount] = cluster.channels.size() / m_cfg.cellCountScale;
featurePtr[eCellSum] =
std::accumulate(chnls.begin(), chnls.end(), 0.0,
[](double s, const Cluster::Cell& c) {
return s + c.activation;
}) /
m_cfg.cellSumScale;
featurePtr[eClusterX] = cluster.sizeLoc0 / m_cfg.clusterXScale;
featurePtr[eClusterY] = cluster.sizeLoc1 / m_cfg.clusterYScale;

sumCells += featurePtr[eCellCount];
sumActivation += featurePtr[eCellSum];
f[ift] /= m_cfg.featureScales[ift];
}
}

ACTS_DEBUG("Avg cell count: " << sumCells / spacepoints.size());
ACTS_DEBUG("Avg activation: " << sumActivation / sumCells);

// Run the pipeline
const auto trackCandidates = [&]() {
std::lock_guard<std::mutex> lock(m_mutex);
Expand Down
11 changes: 11 additions & 0 deletions Examples/Framework/include/ActsExamples/EventData/Cluster.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

#include "ActsFatras/Digitization/Segmentizer.hpp"

#include <numeric>
#include <optional>
#include <vector>

namespace ActsExamples {
Expand All @@ -20,6 +22,15 @@ struct Cluster {
std::size_t sizeLoc0 = 0;
std::size_t sizeLoc1 = 0;
std::vector<Cell> channels;

// TODO make this be provided by Fatras?
Acts::Vector3 globalPosition = Acts::Vector3::Zero();

double sumActivations() const {
return std::accumulate(
channels.begin(), channels.end(), 0.0,
[](double s, const Cluster::Cell& c) { return s + c.activation; });
}
};

/// Clusters have a one-to-one relation with measurements
Expand Down
28 changes: 25 additions & 3 deletions Examples/Python/src/ExaTrkXTrackFinding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,36 @@ void addExaTrkXTrackFinding(Context &ctx) {
}
#endif

py::enum_<TrackFindingAlgorithmExaTrkX::NodeFeature>(mex, "NodeFeature")
.value("R", TrackFindingAlgorithmExaTrkX::NodeFeature::eR)
.value("Phi", TrackFindingAlgorithmExaTrkX::NodeFeature::ePhi)
.value("Z", TrackFindingAlgorithmExaTrkX::NodeFeature::eZ)
.value("X", TrackFindingAlgorithmExaTrkX::NodeFeature::eX)
.value("Y", TrackFindingAlgorithmExaTrkX::NodeFeature::eY)
.value("Eta", TrackFindingAlgorithmExaTrkX::NodeFeature::eEta)
.value("ClusterX", TrackFindingAlgorithmExaTrkX::NodeFeature::eClusterX)
.value("ClusterY", TrackFindingAlgorithmExaTrkX::NodeFeature::eClusterY)
.value("CellCount", TrackFindingAlgorithmExaTrkX::NodeFeature::eCellCount)
.value("CellSum", TrackFindingAlgorithmExaTrkX::NodeFeature::eCellSum)
.value("Cluster1R", TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster1R)
.value("Cluster2R", TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster2R)
.value("Cluster1Phi",
TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster1Phi)
.value("Cluster2Phi",
TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster2Phi)
.value("Cluster1Z", TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster1Z)
.value("Cluster2Z", TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster2Z)
.value("Cluster1Eta",
TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster1Eta)
.value("Cluster2Eta",
TrackFindingAlgorithmExaTrkX::NodeFeature::eCluster2Eta);

ACTS_PYTHON_DECLARE_ALGORITHM(
ActsExamples::TrackFindingAlgorithmExaTrkX, mex,
"TrackFindingAlgorithmExaTrkX", inputSpacePoints, inputSimHits,
inputParticles, inputClusters, inputMeasurementSimhitsMap,
outputProtoTracks, outputGraph, graphConstructor, edgeClassifiers,
trackBuilder, rScale, phiScale, zScale, cellCountScale, cellSumScale,
clusterXScale, clusterYScale, filterShortTracks, targetMinHits,
targetMinPT);
trackBuilder, nodeFeatures, featureScales, filterShortTracks);

{
auto cls =
Expand Down

0 comments on commit b5069e1

Please sign in to comment.