-
Notifications
You must be signed in to change notification settings - Fork 20
/
megaClustering.py
190 lines (161 loc) · 9.38 KB
/
megaClustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# investigate shower development based on RecHits and SimClusters
# import ROOT
# import os
import optparse
# from array import array
# from HGCalImagingAlgo import recHitAboveThreshold
from NtupleDataFormat import HGCalNtuple
# from GeoUtils import GeoUtil
# import math
import hgcalHelpers
# import hgcalHistHelpers
import numpy as np
import pandas as pd
from itertools import repeat
maxlayer = 52
energyWeights = list(repeat(1.02, 28)) + list(repeat(0.86, 12)) + list(repeat(1.12, 12))
def getConeRadius(frontRadius, backRadius, z, maxval=9999.):
depthTerm = backRadius * (abs(z)-320.7)/(407.8-320.7)
val = frontRadius + depthTerm
if val > maxval:
return maxval
return val
def pileupSubtraction(matchedMultiCluster, selectedLayerClusters, recHits, layer, energyRadius, frontRadius, backRadius):
"""For now, the code is the same as in getMegaClusters, but the phi coordinate is changed by pi."""
pTSum = 0
energySum = 0
# take first layer cluster z value
layer_z = selectedLayerClusters.head(1).z.item()
# get multi cluster x and y coordinates
matchedMultiCluster_phi = float(matchedMultiCluster.phi) - np.pi
if (matchedMultiCluster_phi < -np.pi):
matchedMultiCluster_phi += 2*np.pi
multiClusPosDF = hgcalHelpers.convertToXY(matchedMultiCluster.eta, matchedMultiCluster_phi, layer_z)
# calculate radius based on current layer's z position
coneRadius = getConeRadius(frontRadius, backRadius, layer_z)
# mind that we need only the first index since there is only one multiCluster
layerClusterIndices = hgcalHelpers.getIndicesWithinRadius(multiClusPosDF[['x', 'y']], selectedLayerClusters[['x', 'y']], coneRadius)
# now we need to recalculate the layer cluster energies using associated RecHits
for layerClusterIndex in layerClusterIndices[0]:
associatedRecHits = recHits.iloc[selectedLayerClusters.iloc[layerClusterIndex].rechits]
# find maximum energy RecHit
maxEnergyRecHitIndex = associatedRecHits['energy'].argmax()
# considering only associated RecHits within a radius of energyRadius (6 cm)
matchedRecHitIndices = hgcalHelpers.getIndicesWithinRadius(associatedRecHits.loc[[maxEnergyRecHitIndex]][['x', 'y']], associatedRecHits[['x', 'y']], energyRadius)[maxEnergyRecHitIndex]
# sum up energies and pT
selectedRecHits = associatedRecHits.iloc[matchedRecHitIndices]
# correct energy by subdetector weights
energySum += selectedRecHits[["energy"]].sum()[0]*energyWeights[layer-1]*1.38
pTSum += selectedRecHits[["pt"]].sum()[0]*energyWeights[layer-1]*1.38
return (energySum, pTSum)
def getMegaClusters(genParticles, multiClusters, layerClusters, recHits, gun_type, GEN_engpt, pidSelected, energyRadius=6, frontRadius=3, backRadius=8, doPileupSubtraction=True):
"""
get the actual mega clusters.
frontRadius: cone at front of EE
backRadius: cone at back of FH (frontRadius to be added to it)
returns a dataframe containing 4-vectors
"""
# use genParticles with generated pT/energy that reach EE before converting
selectedGen = genParticles[(abs(genParticles.pid) == pidSelected) & (genParticles.reachedEE > 0)]
if gun_type == "pt":
selectedGen = selectedGen[(selectedGen.pt >= GEN_engpt*.999)]
else:
selectedGen = selectedGen[(selectedGen.energy >= GEN_engpt*.999)]
# print selectedGen
if multiClusters.shape[0] == 0:
return pd.DataFrame(columns=['pt', 'eta', 'phi', 'energy'])
# for the mega cluster axis, take highest energy multicluster within dR = 0.1, 0.2 for 7 GeV guns
bestMultiClusterIndices = None
if GEN_engpt <= 7.5:
bestMultiClusterIndices = hgcalHelpers.getHighestEnergyObjectIndex(selectedGen[['eta', 'phi']], multiClusters[['eta', 'phi']], multiClusters['energy'], 0.2)
else:
bestMultiClusterIndices = hgcalHelpers.getHighestEnergyObjectIndex(selectedGen[['eta', 'phi']], multiClusters[['eta', 'phi']], multiClusters['energy'], 0.1)
# print bestMultiClusterIndices
megaClusters = []
for idx, genPart in selectedGen.iterrows():
if idx not in bestMultiClusterIndices:
# this is the case if there's no match between gen and multi clusters
continue
matchedMultiCluster = multiClusters.iloc[[bestMultiClusterIndices[idx]]]
energySum = 0
pTSum = 0
# now find layer clusters within the multicluster cone
# maybe it's good to do this per layer to save some computing time
for layer in range(1, maxlayer+1):
# match only in same detector side
selectedLayerClusters = layerClusters[(layerClusters.layer == layer) & (layerClusters.eta*genPart.eta > 0)]
if selectedLayerClusters.shape[0] == 0:
# continue of no layer clusters selected
continue
# take first layer cluster z value
layer_z = selectedLayerClusters.head(1).z.item()
# get multi cluster x and y coordinates
multiClusPosDF = hgcalHelpers.convertToXY(matchedMultiCluster.eta, matchedMultiCluster.phi, layer_z)
# calculate radius based on current layer's z position
coneRadius = getConeRadius(frontRadius, backRadius, layer_z)
# mind that we need only the first index since there is only one multiCluster
layerClusterIndices = hgcalHelpers.getIndicesWithinRadius(multiClusPosDF[['x', 'y']], selectedLayerClusters[['x', 'y']], coneRadius)
# now we need to recalculate the layer cluster energies using associated RecHits
for layerClusterIndex in layerClusterIndices[0]:
associatedRecHits = recHits.iloc[selectedLayerClusters.iloc[layerClusterIndex].rechits]
# find maximum energy RecHit
maxEnergyRecHitIndex = associatedRecHits['energy'].argmax()
# considering only associated RecHits within a radius of energyRadius (6 cm)
matchedRecHitIndices = hgcalHelpers.getIndicesWithinRadius(associatedRecHits.loc[[maxEnergyRecHitIndex]][['x', 'y']], associatedRecHits[['x', 'y']], energyRadius)[maxEnergyRecHitIndex]
# sum up energies and pT
selectedRecHits = associatedRecHits.iloc[matchedRecHitIndices]
# correct energy by subdetector weights
energySum += selectedRecHits[["energy"]].sum()[0]*energyWeights[layer-1]*1.38
pTSum += selectedRecHits[["pt"]].sum()[0]*energyWeights[layer-1]*1.38
if (doPileupSubtraction):
(pu_energySum, pu_pTSum) = pileupSubtraction(matchedMultiCluster, selectedLayerClusters, recHits, layer, energyRadius, frontRadius, backRadius)
energySum -= pu_energySum
pTSum -= pu_pTSum
# use as coordinates eta and phi of matched multi cluster
megaCluster = [pTSum, matchedMultiCluster['eta'].item(), matchedMultiCluster['phi'].item(), energySum]
megaClusters.append(megaCluster)
megaClustersDF = pd.DataFrame(megaClusters, columns=['pt', 'eta', 'phi', 'energy'])
return megaClustersDF
def getCollections(event):
"""
get the collections to be fed to the mega clustering.
need genParticles, multiClusters, layerClusters, recHits
"""
genParticles = event.getDataFrame(prefix="genpart")
multiClusters = event.getDataFrame(prefix="multiclus")
layerClusters = event.getDataFrame(prefix="cluster2d")
recHits = event.getDataFrame(prefix="rechit")
return genParticles, multiClusters, layerClusters, recHits
def main():
global opt, args
usage = ('usage: %prog [options]\n' + '%prog -h for help')
parser = optparse.OptionParser(usage)
# input options
# parser.add_option('', '--files', dest='fileString', type='string', default='root://eoscms.cern.ch//eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/_SinglePiPt50Eta1p6_2p8_PhaseIITDRFall17DR-noPUFEVT_93X_upgrade2023_realistic_v2-v1_GEN-SIM-RECO/NTUP/_SinglePiPt50Eta1p6_2p8_PhaseIITDRFall17DR-noPUFEVT_93X_upgrade2023_realistic_v2-v1_GEN-SIM-RECO_NTUP_1.root', help='comma-separated file list')
parser.add_option('', '--files', dest='fileString', type='string', default='/afs/cern.ch/work/e/escott/public/HGCStudies/Ntuples/partGun_Pion_Pt25_93X_PU140.root', help='comma-separated file list')
parser.add_option('', '--gunType', dest='gunType', type='string', default='pt', help='pt or e')
parser.add_option('', '--pid', dest='pid', type='int', default=211, help='pdgId int')
parser.add_option('', '--genValue', dest='genValue', type='float', default=25, help='generated pT or energy')
# store options and arguments as global variables
global opt, args
(opt, args) = parser.parse_args()
print "files:", opt.fileString
print "gunType:", opt.gunType
print "pid:", opt.pid
print "GEN_engpt:", opt.genValue
# set sample/tree - for photons
gun_type = opt.gunType
pidSelected = opt.pid
GEN_engpt = opt.genValue
fileList = opt.fileString.split(",")
for fileName in fileList:
ntuple = HGCalNtuple(opt.fileString)
for event in ntuple:
if (event.entry() > 11):
break
# get collections
genParticles, multiClusters, layerClusters, recHits = getCollections(event)
megaClusters = getMegaClusters(genParticles, multiClusters, layerClusters, recHits, gun_type, GEN_engpt, pidSelected)
print megaClusters
if __name__ == '__main__':
main()