Minor plotting improvements

CaioDaumann · Jan 3, 2024 · d14c9ac · d14c9ac
1 parent 9336d35
commit d14c9ac
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 27 deletions.
diff --git a/data_reading/read_data.py b/data_reading/read_data.py
@@ -67,7 +67,7 @@ def perform_zee_kinematics_reweighting(data_array, data_weights, mc_array, mc_we
     phi_min, phi_max = -3.15,3.15
 
     # now the number of bins in each distribution
-    pt_bins,rho_bins,eta_bins,phi_bins = 10, 30, 10, 2 #was 20, 30, 10, 4
+    pt_bins,rho_bins,eta_bins,phi_bins = 10, 32, 12, 2 #was 20, 30, 10, 4
 
     # Now we create a 4d histogram of this kinematic variables
     mc_histo,   edges = np.histogramdd( sample =  (mc_array[:,0] ,   mc_array[:,3],      mc_array[:,1], mc_array[:,2])   , bins = (pt_bins,rho_bins,eta_bins,phi_bins), range = [   [pt_min,pt_max], [ rho_min, rho_max ],[eta_min,eta_max], [phi_min,phi_max]  ], weights = mc_weights )
@@ -225,18 +225,23 @@ def read_zee_data():
         path_to_data =  "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/"
 
         # Lets now read the data and simultion as pandas dataframes
-        files_DY_mc  = glob.glob( "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/DY_postEE_v13/nominal/*.parquet")
+        files_DY_mc  = glob.glob(  "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/DY_postEE_v13/nominal/*.parquet")
         files_DY_mc = files_DY_mc[:300]
         simulation   = [pd.read_parquet(f) for f in files_DY_mc]
         drell_yan_df = pd.concat(simulation,ignore_index=True)
 
         # now the data files for the epochs F and G
-        files_DY_data_F = glob.glob(  "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/dataG_v13/nominal/*.parquet")
+        files_DY_data_E = glob.glob(  "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/dataE_v13/nominal/*.parquet")
+        files_DY_data_E = files_DY_data_E[:300]
+
+        files_DY_data_F = glob.glob(  "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/dataF_v13/nominal/*.parquet")
         files_DY_data_F = files_DY_data_F[:300]
 
         files_DY_data_G  = glob.glob(  "/net/scratch_cms3a/daumann/HiggsDNA/v13_samples_2/dataG_v13/nominal/*.parquet")
         files_DY_data_G = files_DY_data_G[:300]
 
+        # I think there is a problem with the samples above, lets test with the new samples!
+
     else:
 
         path_to_data = "/net/scratch_cms3a/daumann/nanoAODv12_Production/"
@@ -255,7 +260,7 @@ def read_zee_data():
         files_DY_data_G = files_DY_data_G[:30]
 
     # merhging both dataframes
-    files_DY_data = [files_DY_data_G,files_DY_data_F]
+    files_DY_data = [files_DY_data_E,files_DY_data_G,files_DY_data_F]
 
     data   = [pd.read_parquet(f) for f in files_DY_data]
     data_df = pd.concat(data,ignore_index=True)
@@ -269,11 +274,18 @@ def read_zee_data():
     # The selection will be done in the perform_zee_selection() function
     mask_data, mask_mc = perform_zee_selection( data_df, drell_yan_df )
 
+    # Switch to decide if reweihting is used or no!
+    perform_reweithing = True
+
     # now, due to diferences in kinematics, a rewighting in the four kinematic variables [pt,eta,phi and rho] will be perform
     mc_weights   = drell_yan_df["weight"]
     mc_weights_before = mc_weights
     data_weights = np.ones( len( data_df["fixedGridRhoAll"] ) )
-    data_weights, mc_weights = perform_zee_kinematics_reweighting(data_df[conditions_list][mask_data], data_weights[mask_data], drell_yan_df[conditions_list][mask_mc], mc_weights[mask_mc])
+
+    if( perform_reweithing ):
+        data_weights, mc_weights = perform_zee_kinematics_reweighting(data_df[conditions_list][mask_data], data_weights[mask_data], drell_yan_df[conditions_list][mask_mc], mc_weights[mask_mc])
+    else:
+        data_weights, mc_weights = np.array(data_weights[mask_data])/np.sum( np.array(data_weights[mask_data]) ), np.array(mc_weights[mask_mc])/np.sum(np.array(mc_weights[mask_mc])) #perform_zee_kinematics_reweighting(data_df[conditions_list][mask_data], data_weights[mask_data], drell_yan_df[conditions_list][mask_mc], mc_weights[mask_mc])
 
     # now lets call a plotting function to perform the plots of the read distributions for validation porpuses
     path_to_plots = "/net/scratch_cms3a/daumann/PhD/EarlyHgg/simulation_to_data_corrections/plot/validation_plots/"

diff --git a/main.py b/main.py
@@ -51,6 +51,8 @@ def main():
         corrections.setup_flow()
         corrections.train_the_flow()
 
+        #exit()
+
     # Now, we call the class that handles the transformations, training and validaiton of the corrections
     #corrections = training_utils.Simulation_correction()
     #corrections.setup_flow()

diff --git a/normalizing_flows/training_utils.py b/normalizing_flows/training_utils.py
@@ -36,7 +36,7 @@ def __init__(self, configuration, n_transforms, n_splines_bins, aux_nodes, aux_l
 
         # Checking if cuda is avaliable
         print('Checking cuda avaliability: ', torch.cuda.is_available())
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        device = torch.device('cpu' if torch.cuda.is_available() else 'cpu')
         self.device = device
 
         #reading important tensors
@@ -158,15 +158,15 @@ def train_the_flow(self):
     # load the trained model and perform the final plots!
     def evaluate_results(self):
 
-        # Results are evaluated in the test dataset, never seen by the neural network!
-        # A mask is applied so only MC events are chossen from the data space
-        MaskOnlyvalidationMC       = self.validation_conditions[:,self.validation_conditions.size()[1] -1 ] == 0
-        self.validation_conditions = self.validation_conditions[MaskOnlyvalidationMC]
-        self.validation_inputs     = self.validation_inputs[MaskOnlyvalidationMC]
-        self.validation_weights    = self.validation_weights[MaskOnlyvalidationMC]
-
         with torch.no_grad():
 
+            # Results are evaluated in the test dataset, never seen by the neural network!
+            # A mask is applied so only MC events are chossen from the data space
+            MaskOnlyvalidationMC       = self.validation_conditions[:,self.validation_conditions.size()[1] -1 ] == 0
+            self.validation_conditions = self.validation_conditions[MaskOnlyvalidationMC]
+            self.validation_inputs     = self.validation_inputs[MaskOnlyvalidationMC]
+            self.validation_weights    = self.validation_weights[MaskOnlyvalidationMC]
+
             trans      = self.flow(self.mc_test_conditions).transform
             sim_latent = trans(    self.mc_test_inputs )
 
@@ -175,6 +175,13 @@ def evaluate_results(self):
             trans2 = self.flow(self.mc_test_conditions).transform
             self.samples = trans2.inv( sim_latent)
 
+            # lets save the means and std used for the transformations
+            np.save( os.getcwd() + '/results/' +self.configuration + '/' +  'input_means.npy', self.input_mean_for_std)
+            np.save( os.getcwd() + '/results/' +self.configuration + '/' +  'input_std.npy'  , self.input_std_for_std )
+
+            np.save( os.getcwd() + '/results/' +self.configuration + '/' +  'conditions_means.npy', self.condition_mean_for_std)
+            np.save( os.getcwd() + '/results/' +self.configuration + '/'  +  'conditions_std.npy' , self.condition_std_for_std )
+
             # now, lets invert the transformations
             self.invert_transformation()
 
@@ -218,11 +225,11 @@ def perform_transformation(self):
 
             # since hoe has very low values, the shift value (value until traingular events are sampled) must be diferent here
             if( index == 6 ):
-                self.vector_for_iso_constructors_data.append( Make_iso_continuous(self.data_training_inputs[:,index], b = 0.001) )
-                self.vector_for_iso_constructors_mc.append( Make_iso_continuous(self.mc_training_inputs[:,index] , b= 0.001) )
+                self.vector_for_iso_constructors_data.append( Make_iso_continuous(self.data_training_inputs[:,index], self.device , b = 0.001) )
+                self.vector_for_iso_constructors_mc.append( Make_iso_continuous(self.mc_training_inputs[:,index], self.device  , b= 0.001) )
             else:
-                self.vector_for_iso_constructors_data.append( Make_iso_continuous(self.data_training_inputs[:,index]) )
-                self.vector_for_iso_constructors_mc.append( Make_iso_continuous(self.mc_training_inputs[:,index]) )
+                self.vector_for_iso_constructors_data.append( Make_iso_continuous(self.data_training_inputs[:,index], self.device ) )
+                self.vector_for_iso_constructors_mc.append( Make_iso_continuous(self.mc_training_inputs[:,index], self.device ) )
 
         # now really applying the transformations
         counter = 0
@@ -267,7 +274,7 @@ def perform_transformation(self):
         print( self.condition_std_for_std )
         exit()
         """
-
+    
         # transorming the training tensors
         self.training_inputs = ( self.training_inputs - self.input_mean_for_std  )/self.input_std_for_std
         self.training_conditions[:,:-1] = ( self.training_conditions[:,:-1] - self.condition_mean_for_std )/self.condition_std_for_std
@@ -366,8 +373,10 @@ def read_saved_tensor(self):
 
 # this is the class responsable for the isolation variables transformation
 class Make_iso_continuous:
-    def __init__(self, tensor, b = False):
+    def __init__(self, tensor, device, b = False):
 
+        self.device = device
+
         #tensor = tensor.cpu()
         self.iso_bigger_zero  = tensor > 0 
         self.iso_equal_zero   = tensor == 0
@@ -393,7 +402,7 @@ def shift_and_sample(self, tensor):
         # now a log trasform is applied on top of the smoothing to stretch the events in the 0 traingular and "kill" the iso tails
         tensor = torch.log(  1e-3 + tensor ) 
 
-        return tensor.to('cuda')
+        return tensor.to(self.device)
 
     #inverse operation of the above shift_and_sample transform
     def inverse_shift_and_sample( self,tensor , processed = False):
@@ -414,4 +423,4 @@ def inverse_shift_and_sample( self,tensor , processed = False):
             assert (abs(torch.sum(  self.before_transform - tensor )) < tensor.size()[0]*1e-6 )
             #added the tensor.size()[0]*1e-6 term due to possible numerical fluctioations!
 
-        return tensor.to('cuda')
+        return tensor.to(self.device)
diff --git a/plot/plot_utils.py b/plot/plot_utils.py
@@ -225,13 +225,13 @@ def plot_distributions_for_tensors( data_tensor, mc_tensor, flow_samples, mc_wei
             std  = np.std(  np.array(data_tensor[:,i]) )
 
             if( 'Iso' in str(var_list[i]) or 'DR' in str(var_list[i]) or 'esE' in str(var_list[i]) or 'hoe' in str(var_list[i]) or 'energy' in str(var_list[i])  ):
-                data_hist            = hist.Hist(hist.axis.Regular(45, 0.0 , mean + 2.0*std))
-                mc_hist              = hist.Hist(hist.axis.Regular(45, 0.0 , mean + 2.0*std))
-                mc_rw_hist           = hist.Hist(hist.axis.Regular(45, 0.0 , mean + 2.0*std))
+                data_hist            = hist.Hist(hist.axis.Regular(50, 0.0 , mean + 2.0*std))
+                mc_hist              = hist.Hist(hist.axis.Regular(50, 0.0 , mean + 2.0*std))
+                mc_rw_hist           = hist.Hist(hist.axis.Regular(50, 0.0 , mean + 2.0*std))
             else:
-                data_hist            = hist.Hist(hist.axis.Regular(45, mean - 2.5*std, mean + 2.5*std))
-                mc_hist              = hist.Hist(hist.axis.Regular(45, mean - 2.5*std, mean + 2.5*std))
-                mc_rw_hist           = hist.Hist(hist.axis.Regular(45, mean - 2.5*std, mean + 2.5*std))
+                data_hist            = hist.Hist(hist.axis.Regular(50, mean - 2.5*std, mean + 2.5*std))
+                mc_hist              = hist.Hist(hist.axis.Regular(50, mean - 2.5*std, mean + 2.5*std))
+                mc_rw_hist           = hist.Hist(hist.axis.Regular(50, mean - 2.5*std, mean + 2.5*std))
 
             if( 'DR04' in str(var_list[i])  ):
                 data_hist            = hist.Hist(hist.axis.Regular(50, 0.0 , 5.0))