Skip to content

Commit

Permalink
Minor plotting improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
Caio Cesar Daumann committed Jan 3, 2024
1 parent 9336d35 commit d14c9ac
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 27 deletions.
22 changes: 17 additions & 5 deletions data_reading/read_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def perform_zee_kinematics_reweighting(data_array, data_weights, mc_array, mc_we
phi_min, phi_max = -3.15,3.15

# now the number of bins in each distribution
pt_bins,rho_bins,eta_bins,phi_bins = 10, 30, 10, 2 #was 20, 30, 10, 4
pt_bins,rho_bins,eta_bins,phi_bins = 10, 32, 12, 2 #was 20, 30, 10, 4

# Now we create a 4d histogram of this kinematic variables
mc_histo, edges = np.histogramdd( sample = (mc_array[:,0] , mc_array[:,3], mc_array[:,1], mc_array[:,2]) , bins = (pt_bins,rho_bins,eta_bins,phi_bins), range = [ [pt_min,pt_max], [ rho_min, rho_max ],[eta_min,eta_max], [phi_min,phi_max] ], weights = mc_weights )
Expand Down Expand Up @@ -225,18 +225,23 @@ def read_zee_data():
path_to_data = "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/"

# Lets now read the data and simultion as pandas dataframes
files_DY_mc = glob.glob( "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/DY_postEE_v13/nominal/*.parquet")
files_DY_mc = glob.glob( "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/DY_postEE_v13/nominal/*.parquet")
files_DY_mc = files_DY_mc[:300]
simulation = [pd.read_parquet(f) for f in files_DY_mc]
drell_yan_df = pd.concat(simulation,ignore_index=True)

# now the data files for the epochs F and G
files_DY_data_F = glob.glob( "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/dataG_v13/nominal/*.parquet")
files_DY_data_E = glob.glob( "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/dataE_v13/nominal/*.parquet")
files_DY_data_E = files_DY_data_E[:300]

files_DY_data_F = glob.glob( "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/dataF_v13/nominal/*.parquet")
files_DY_data_F = files_DY_data_F[:300]

files_DY_data_G = glob.glob( "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/dataG_v13/nominal/*.parquet")
files_DY_data_G = files_DY_data_G[:300]

# I think there is a problem with the samples above, lets test with the new samples!

else:

path_to_data = "/net/scratch_cms3a/daumann/nanoAODv12_Production/"
Expand All @@ -255,7 +260,7 @@ def read_zee_data():
files_DY_data_G = files_DY_data_G[:30]

# merhging both dataframes
files_DY_data = [files_DY_data_G,files_DY_data_F]
files_DY_data = [files_DY_data_E,files_DY_data_G,files_DY_data_F]

data = [pd.read_parquet(f) for f in files_DY_data]
data_df = pd.concat(data,ignore_index=True)
Expand All @@ -269,11 +274,18 @@ def read_zee_data():
# The selection will be done in the perform_zee_selection() function
mask_data, mask_mc = perform_zee_selection( data_df, drell_yan_df )

# Switch to decide if reweihting is used or no!
perform_reweithing = True

# now, due to diferences in kinematics, a rewighting in the four kinematic variables [pt,eta,phi and rho] will be perform
mc_weights = drell_yan_df["weight"]
mc_weights_before = mc_weights
data_weights = np.ones( len( data_df["fixedGridRhoAll"] ) )
data_weights, mc_weights = perform_zee_kinematics_reweighting(data_df[conditions_list][mask_data], data_weights[mask_data], drell_yan_df[conditions_list][mask_mc], mc_weights[mask_mc])

if( perform_reweithing ):
data_weights, mc_weights = perform_zee_kinematics_reweighting(data_df[conditions_list][mask_data], data_weights[mask_data], drell_yan_df[conditions_list][mask_mc], mc_weights[mask_mc])
else:
data_weights, mc_weights = np.array(data_weights[mask_data])/np.sum( np.array(data_weights[mask_data]) ), np.array(mc_weights[mask_mc])/np.sum(np.array(mc_weights[mask_mc])) #perform_zee_kinematics_reweighting(data_df[conditions_list][mask_data], data_weights[mask_data], drell_yan_df[conditions_list][mask_mc], mc_weights[mask_mc])

# now lets call a plotting function to perform the plots of the read distributions for validation porpuses
path_to_plots = "/net/scratch_cms3a/daumann/PhD/EarlyHgg/simulation_to_data_corrections/plot/validation_plots/"
Expand Down
2 changes: 2 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def main():
corrections.setup_flow()
corrections.train_the_flow()

#exit()

# Now, we call the class that handles the transformations, training and validaiton of the corrections
#corrections = training_utils.Simulation_correction()
#corrections.setup_flow()
Expand Down
41 changes: 25 additions & 16 deletions normalizing_flows/training_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, configuration, n_transforms, n_splines_bins, aux_nodes, aux_l

# Checking if cuda is avaliable
print('Checking cuda avaliability: ', torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu' if torch.cuda.is_available() else 'cpu')
self.device = device

#reading important tensors
Expand Down Expand Up @@ -158,15 +158,15 @@ def train_the_flow(self):
# load the trained model and perform the final plots!
def evaluate_results(self):

# Results are evaluated in the test dataset, never seen by the neural network!
# A mask is applied so only MC events are chossen from the data space
MaskOnlyvalidationMC = self.validation_conditions[:,self.validation_conditions.size()[1] -1 ] == 0
self.validation_conditions = self.validation_conditions[MaskOnlyvalidationMC]
self.validation_inputs = self.validation_inputs[MaskOnlyvalidationMC]
self.validation_weights = self.validation_weights[MaskOnlyvalidationMC]

with torch.no_grad():

# Results are evaluated in the test dataset, never seen by the neural network!
# A mask is applied so only MC events are chossen from the data space
MaskOnlyvalidationMC = self.validation_conditions[:,self.validation_conditions.size()[1] -1 ] == 0
self.validation_conditions = self.validation_conditions[MaskOnlyvalidationMC]
self.validation_inputs = self.validation_inputs[MaskOnlyvalidationMC]
self.validation_weights = self.validation_weights[MaskOnlyvalidationMC]

trans = self.flow(self.mc_test_conditions).transform
sim_latent = trans( self.mc_test_inputs )

Expand All @@ -175,6 +175,13 @@ def evaluate_results(self):
trans2 = self.flow(self.mc_test_conditions).transform
self.samples = trans2.inv( sim_latent)

# lets save the means and std used for the transformations
np.save( os.getcwd() + '/results/' +self.configuration + '/' + 'input_means.npy', self.input_mean_for_std)
np.save( os.getcwd() + '/results/' +self.configuration + '/' + 'input_std.npy' , self.input_std_for_std )

np.save( os.getcwd() + '/results/' +self.configuration + '/' + 'conditions_means.npy', self.condition_mean_for_std)
np.save( os.getcwd() + '/results/' +self.configuration + '/' + 'conditions_std.npy' , self.condition_std_for_std )

# now, lets invert the transformations
self.invert_transformation()

Expand Down Expand Up @@ -218,11 +225,11 @@ def perform_transformation(self):

# since hoe has very low values, the shift value (value until traingular events are sampled) must be diferent here
if( index == 6 ):
self.vector_for_iso_constructors_data.append( Make_iso_continuous(self.data_training_inputs[:,index], b = 0.001) )
self.vector_for_iso_constructors_mc.append( Make_iso_continuous(self.mc_training_inputs[:,index] , b= 0.001) )
self.vector_for_iso_constructors_data.append( Make_iso_continuous(self.data_training_inputs[:,index], self.device , b = 0.001) )
self.vector_for_iso_constructors_mc.append( Make_iso_continuous(self.mc_training_inputs[:,index], self.device , b= 0.001) )
else:
self.vector_for_iso_constructors_data.append( Make_iso_continuous(self.data_training_inputs[:,index]) )
self.vector_for_iso_constructors_mc.append( Make_iso_continuous(self.mc_training_inputs[:,index]) )
self.vector_for_iso_constructors_data.append( Make_iso_continuous(self.data_training_inputs[:,index], self.device ) )
self.vector_for_iso_constructors_mc.append( Make_iso_continuous(self.mc_training_inputs[:,index], self.device ) )

# now really applying the transformations
counter = 0
Expand Down Expand Up @@ -267,7 +274,7 @@ def perform_transformation(self):
print( self.condition_std_for_std )
exit()
"""

# transorming the training tensors
self.training_inputs = ( self.training_inputs - self.input_mean_for_std )/self.input_std_for_std
self.training_conditions[:,:-1] = ( self.training_conditions[:,:-1] - self.condition_mean_for_std )/self.condition_std_for_std
Expand Down Expand Up @@ -366,8 +373,10 @@ def read_saved_tensor(self):

# this is the class responsable for the isolation variables transformation
class Make_iso_continuous:
def __init__(self, tensor, b = False):
def __init__(self, tensor, device, b = False):

self.device = device

#tensor = tensor.cpu()
self.iso_bigger_zero = tensor > 0
self.iso_equal_zero = tensor == 0
Expand All @@ -393,7 +402,7 @@ def shift_and_sample(self, tensor):
# now a log trasform is applied on top of the smoothing to stretch the events in the 0 traingular and "kill" the iso tails
tensor = torch.log( 1e-3 + tensor )

return tensor.to('cuda')
return tensor.to(self.device)

#inverse operation of the above shift_and_sample transform
def inverse_shift_and_sample( self,tensor , processed = False):
Expand All @@ -414,4 +423,4 @@ def inverse_shift_and_sample( self,tensor , processed = False):
assert (abs(torch.sum( self.before_transform - tensor )) < tensor.size()[0]*1e-6 )
#added the tensor.size()[0]*1e-6 term due to possible numerical fluctioations!

return tensor.to('cuda')
return tensor.to(self.device)
12 changes: 6 additions & 6 deletions plot/plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,13 +225,13 @@ def plot_distributions_for_tensors( data_tensor, mc_tensor, flow_samples, mc_wei
std = np.std( np.array(data_tensor[:,i]) )

if( 'Iso' in str(var_list[i]) or 'DR' in str(var_list[i]) or 'esE' in str(var_list[i]) or 'hoe' in str(var_list[i]) or 'energy' in str(var_list[i]) ):
data_hist = hist.Hist(hist.axis.Regular(45, 0.0 , mean + 2.0*std))
mc_hist = hist.Hist(hist.axis.Regular(45, 0.0 , mean + 2.0*std))
mc_rw_hist = hist.Hist(hist.axis.Regular(45, 0.0 , mean + 2.0*std))
data_hist = hist.Hist(hist.axis.Regular(50, 0.0 , mean + 2.0*std))
mc_hist = hist.Hist(hist.axis.Regular(50, 0.0 , mean + 2.0*std))
mc_rw_hist = hist.Hist(hist.axis.Regular(50, 0.0 , mean + 2.0*std))
else:
data_hist = hist.Hist(hist.axis.Regular(45, mean - 2.5*std, mean + 2.5*std))
mc_hist = hist.Hist(hist.axis.Regular(45, mean - 2.5*std, mean + 2.5*std))
mc_rw_hist = hist.Hist(hist.axis.Regular(45, mean - 2.5*std, mean + 2.5*std))
data_hist = hist.Hist(hist.axis.Regular(50, mean - 2.5*std, mean + 2.5*std))
mc_hist = hist.Hist(hist.axis.Regular(50, mean - 2.5*std, mean + 2.5*std))
mc_rw_hist = hist.Hist(hist.axis.Regular(50, mean - 2.5*std, mean + 2.5*std))

if( 'DR04' in str(var_list[i]) ):
data_hist = hist.Hist(hist.axis.Regular(50, 0.0 , 5.0))
Expand Down

0 comments on commit d14c9ac

Please sign in to comment.