uniovi-hepex · folguera · Dec 16, 2024 · Dec 3, 2024 · Dec 3, 2024 · Dec 3, 2024
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -44,7 +44,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/var/folders/08/4qyj2h792lv7kk8b3phm_j000000gn/T/ipykernel_71433/1996300641.py:26: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "/var/folders/08/4qyj2h792lv7kk8b3phm_j000000gn/T/ipykernel_90950/1996300641.py:26: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
       "  graph = torch.load(file_path)\n"
      ]
     },
@@ -105,37 +105,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total Graphs: 507314\n",
-      "Total Graphs after filtering: 502362\n",
-      "Total Graphs after filtering: 502362\n",
-      "====================================\n",
-      "Example of data:\n",
-      "<class 'torch_geometric.data.data.Data'>\n",
-      "tensor([[1.0005e+00, 2.4179e+00, 4.3113e+02, 0.0000e+00, 3.0000e+00],\n",
-      "        [1.1310e+00, 2.3166e+00, 4.1368e+02, 1.0000e+01, 5.0000e+00],\n",
-      "        [1.0657e+00, 2.3225e+00, 4.4868e+02, 1.1000e+01, 5.0000e+00],\n",
-      "        [1.1092e+00, 2.3469e+00, 1.1241e+03, 7.0000e+00, 9.0000e+00],\n",
-      "        [1.0766e+00, 2.3318e+00, 1.1015e+03, 1.6000e+01, 5.0000e+00]],\n",
-      "       dtype=torch.float64)\n",
-      "tensor([[0, 0, 1, 1, 2, 2, 3, 4],\n",
-      "        [1, 2, 0, 2, 0, 1, 4, 3]])\n",
-      "tensor([[-0.1012,  0.1305],\n",
-      "        [-0.0954,  0.0653],\n",
-      "        [-0.1012,  0.1305],\n",
-      "        [ 0.0058, -0.0653],\n",
-      "        [-0.0954,  0.0653],\n",
-      "        [ 0.0058, -0.0653],\n",
-      "        [-0.0151, -0.0326],\n",
-      "        [-0.0151, -0.0326]])\n",
-      "tensor(-0.0836)\n",
-      "====================================\n"
+     "ename": "NameError",
+     "evalue": "name 'graphs' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch_geometric\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtransforms\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mT\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m Graphs_for_training \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msum\u001b[39m(\u001b[43mgraphs\u001b[49m, [])\n\u001b[1;32m      4\u001b[0m Graphs_for_training_reduced \u001b[38;5;241m=\u001b[39m Graphs_for_training\n\u001b[1;32m      5\u001b[0m Graphs_for_training_filtered \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m      6\u001b[0m     g \u001b[38;5;28;01mfor\u001b[39;00m g \u001b[38;5;129;01min\u001b[39;00m Graphs_for_training_reduced\n\u001b[1;32m      7\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(g\u001b[38;5;241m.\u001b[39my)\u001b[38;5;241m.\u001b[39many() \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misnan(g\u001b[38;5;241m.\u001b[39mx)\u001b[38;5;241m.\u001b[39many())  \u001b[38;5;129;01mand\u001b[39;00m g\u001b[38;5;241m.\u001b[39medge_index\u001b[38;5;241m.\u001b[39msize(\u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m      8\u001b[0m ]\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'graphs' is not defined"
      ]
     }
    ],
@@ -178,7 +159,18 @@
     "print(Graphs_for_training_filtered[0].edge_index)\n",
     "print(Graphs_for_training_filtered[0].edge_attr)\n",
     "print(Graphs_for_training_filtered[0].y)\n",
-    "print(\"====================================\")"
+    "print(\"====================================\")\n",
+    "\n",
+    "# Save data (not normalized) for later use\n",
+    "events = len(Graphs_for_training_filtered)\n",
+    "ntrain = int((events * 0.7) / BatchSize) * BatchSize  # to have full batches\n",
+    "print(f\"Training events: {ntrain}\")\n",
+    "\n",
+    "train_dataset_noNORM = Graphs_for_training_filtered[:ntrain]\n",
+    "test_dataset_noNORM = Graphs_for_training_filtered[ntrain:ntrain * 2]\n",
+    "\n",
+    "train_loader_noNORM = DataLoader(train_dataset_noNORM, batch_size=BatchSize, shuffle=True)\n",
+    "test_loader_noNORM = DataLoader(test_dataset_noNORM, batch_size=BatchSize, shuffle=False)\n"
    ]
   },
   {
@@ -301,7 +293,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -355,6 +347,7 @@
     "        break  # Only draw the first batch\n",
     "\n",
     "# Plot histograms for training data\n",
+    "plot_histograms(train_loader_noNORM)\n",
     "plot_histograms(train_loader)"
    ]
   },
@@ -665,13 +658,6 @@
     "    torch.save(model_MPNNRegressor.state_dict(), f'{ModelOutDIR}model_MPNNRegressor_{BatchSize}batches_{num_epochs}epochs_4MPL_8Lins.pth')"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -794,12 +780,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "def plot_results(all_regression, all_prediction, label='Model'):\n",
-    "    fig, axs = plt.subplots(1, 2, figsize=(15, 5))\n",
+    "    fig, axs = plt.subplots(1, 3, figsize=(15, 5))\n",
     "\n",
     "    print(\"Plotting Regression target\")\n",
     "    axs[0].hist(all_regression, bins=np.arange(-0.5,0,0.006), alpha=0.75, label='Regression target')\n",
@@ -810,12 +796,27 @@
     "    axs[0].legend()\n",
     "\n",
     "    axs[1].scatter(all_regression, all_prediction, alpha=0.5)\n",
-    "    axs[1].set_xlim(-0.5,0)\n",
-    "    plt.plot([min(all_prediction), max(all_prediction)], [min(all_prediction), max(all_prediction)], color='red', linestyle='--') # Line of equality\n",
+    "    axs[1].plot([min(all_prediction), max(all_prediction)], [min(all_prediction), max(all_prediction)], color='red', linestyle='--') # Line of equality\n",
     "    axs[1].set_title(f'Regression target vs prediction for {label}')\n",
     "    axs[1].set_xlabel('Regression target')\n",
     "    axs[1].set_ylabel('Prediction')\n",
     "\n",
+    "    axs[2].hist(all_prediction - all_regression, bins=30, alpha=0.75)\n",
+    "    axs[2].set_title(f'Residuals for {label}')\n",
+    "    axs[2].set_xlabel('Residual')\n",
+    "    axs[2].set_ylabel('Frequency')\n",
+    "    \n",
+    "    # Calculate the bias and resolution and plot them in the graph\n",
+    "    bias = np.mean(all_prediction - all_regression)\n",
+    "    resolution = np.std(all_prediction - all_regression)\n",
+    "\n",
+    "    # Add text box with bias and resolution\n",
+    "    textstr = f'Bias: {bias:.4f}\\nResolution: {resolution:.4f}'\n",
+    "    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)\n",
+    "    axs[1].text(0.95, 0.95, textstr, transform=axs[1].transAxes, fontsize=12,\n",
+    "                verticalalignment='top', horizontalalignment='right', bbox=props)\n",
+    "\n",
+    "\n",
     "    plt.tight_layout()\n",
     "    plt.show()\n",
     "\n"

@@ -0,0 +1,9 @@
+digraph {
+	graph [size="12,12"]
+	node [align=left fontname=monospace fontsize=10 height=0.2 ranksep=0.1 shape=box style=filled]
+	2276440280112 [label="
+ (1024)" fillcolor=darkolivegreen1]
+	2276440280592 [label="
+ (1024, 1)" fillcolor=darkolivegreen3]
+	2276440280592 -> 2276440280112 [style=dotted]
+}
@@ -0,0 +1,109 @@
+import os,sys
+
+print('START\n')
+########   YOU ONLY NEED TO FILL THE AREA BELOW   #########
+########   customization  area #########
+GraphFolder = "/eos/cms/store/user/folguera/L1TMuon/INTREPID/Graphs_v240725_241106/" # list with all the file directories
+ModelFolder = "/eos/cms/store/user/folguera/L1TMuon/INTREPID/Model_Graphsv240725_QOverPtRegression_241203/"
+ModelTypes = ['SAGE', 'MPNN']
+NormalizationTypes = ['DropLastTwoNodeFeatures', 'NodesAndEdgesAndOnlySpatial']
+InputGraphs = ["3neighbours_muonQOverPt/", "all_connections_muonQOverPt/"]
+GraphName = "vix_graph_6Nov"
+Epochs = 50
+OutputDir = "/eos/user/f/folguera/www/INTREPID/2024_12_04_GNN_QOverPtRegression/"
+JustPrint = True
+########   customization end   #########
+
+
+if JustPrint:
+    print("##########################")
+    print("source pyenv/bin/activate\n")
+
+    if not os.path.exists(OutputDir):
+        print("OutputDir %s does not exist" %(OutputDir))
+        os.system("mkdir %s" %(OutputDir))
+
+    for model in ModelTypes:
+        for normalization in NormalizationTypes: 
+            for input_graph in InputGraphs:
+                SaveTag = model + "_" + normalization + "_Bsize64_lr5e-4_241106_20files_"
+                if "all" in input_graph:
+                    SaveTag = SaveTag + "allConnections"
+                else:
+                    SaveTag = SaveTag + "3neighbours"
+                ModelFile = f'model_{model}_32dim_50epochs_{SaveTag}.pth'
+
+                print("python tools/training/TrainModelFromGraph.py --model_type %s --hidden_dim 32 --normalization %s --graph_path %s --output_dir %s --do_validation --save_tag %s --batch_size 1024 --learning_rate 0.001 --num_files 5 --graph_name %s --epochs %d --model_path %s/%s &\n" %(model, normalization, GraphFolder+input_graph, OutputDir, SaveTag, GraphName, Epochs, ModelFolder,ModelFile))
+
+
+
+    print("##########################")
+    sys.exit()
+
+### NOW SUBMIT THE JOBS
+queue = "espresso"
+WORKDIR = "/afs/cern.ch/user/f/folguera/workdir/INTREPID/tmp/PlotModel/"
+
+path = os.getcwd()
+print('do not worry about folder creation:\n')
+os.system("rm -rf %s" %(WORKDIR))
+os.system("mkdir %s" %(WORKDIR))
+os.system("mkdir %s/exec" %(WORKDIR))
+os.system("mkdir %s/batchlogs" %(WORKDIR))
+
+if not os.path.exists(OutputDir):
+    print("OutputDir %s does not exist" %(OutputDir))
+    os.system("mkdir %s" %(OutputDir))
+else :
+    print("Warning: OutputDir already exists. It will be overwritten\n")
+    print("OutputDir: %s" %(OutputDir))
+
+
+file_count = 0
+for model in ModelTypes:
+    for normalization in NormalizationTypes: 
+        for input_graph in InputGraphs:
+            file_count += 1
+            SaveTag = model + "_" + normalization + "_Bsize64_lr5e-4_241106_20files_"
+            if "all" in input_graph:
+                SaveTag = SaveTag + "allConnections"
+            else:
+                SaveTag = SaveTag + "3neighbours"
+            ModelFile = f'model_{model}_32dim_50epochs_{SaveTag}.pth'
+
+            with open('%s/exec/job_plot_model_%02d.sh' %(WORKDIR, file_count), 'w') as fout:
+                fout.write("#!/bin/sh\n")
+                fout.write("echo\n")
+                fout.write("echo\n")
+                fout.write("echo 'START---------------'\n")
+                fout.write("echo 'WORKDIR ' ${PWD}\n")
+                fout.write("cd "+str(path)+"\n")
+                fout.write("source pyenv/bin/activate\n")
+                fout.write("echo 'Saving Model in  %s' \n" %(OutputDir))
+                fout.write("python tools/training/TrainModelFromGraph.py --model_type %s --hidden_dim 32 --normalization %s --graph_path %s --output_dir %s --plot_graph_features --do_validation --save_tag %s --batch_size 1024 --learning_rate 0.001 --num_files 5 --graph_name %s --epochs %d --model_path %s/%s\n" %(model, normalization, GraphFolder+input_graph, OutputDir, SaveTag, GraphName, Epochs, ModelFolder,ModelFile))
+                fout.write("echo 'STOP---------------'\n")
+                fout.write("echo\n")
+                fout.write("echo\n")
+            os.system("chmod 755 %s/exec/job_plot_model_%02d.sh" %(WORKDIR, file_count))
+
+###### create submit.sub file ####
+with open('submit.sub', 'w') as fout:
+    fout.write("executable              = $(filename)\n")
+    fout.write("arguments               = $(ClusterId)$(ProcId)\n")
+    fout.write("output                  = %s/batchlogs/$(ClusterId).$(ProcId).out\n" %(WORKDIR))
+    fout.write("error                   = %s/batchlogs/$(ClusterId).$(ProcId).err\n"    %(WORKDIR))
+    fout.write("log                     = %s/batchlogs/$(ClusterId).log\n"             %(WORKDIR))
+    fout.write('+JobFlavour = "%s"\n' %(queue))
+    fout.write("\n")
+    fout.write("queue filename matching (%s/exec/job_*sh)\n" %(WORKDIR))
+
+###### sends bjobs ######
+os.system("echo submit.sub")
+os.system("condor_submit submit.sub")
+
+print()
+print("your jobs:")
+os.system("condor_q")
+print()
+print('END')
+print()
@@ -4,10 +4,15 @@
 print('START\n')
 ########   YOU ONLY NEED TO FILL THE AREA BELOW   #########
 ########   customization  area #########
-InputFolder = "/eos/cms/store/user/folguera/L1TMuon/INTREPID/Graphs_v240725_241106/3neighbours_muonQOverPt/" # list with all the file directories
+InputFolder = "/eos/cms/store/user/folguera/L1TMuon/INTREPID/Graphs_v240725_241106/" # list with all the file directories
 queue = "workday" # give bsub queue -- 8nm (8 minutes), 1nh (1 hour), 8nh, 1nd (1day), 2nd, 1nw (1 week), 2nw
-OutputDir = "/eos/cms/store/user/folguera/L1TMuon/INTREPID/Model_v240725_Bsize64_lr5e-4_NOnormNodes_GAT_241106/"
+OutputDir = "/eos/cms/store/user/folguera/L1TMuon/INTREPID/Model_Graphsv240725_QOverPtRegression_241203/"
 WORKDIR = "/afs/cern.ch/user/f/folguera/workdir/INTREPID/tmp/TrainingModel/"
+ModelTypes = ['SAGE', 'MPNN']
+NormalizationTypes = ['DropLastTwoNodeFeatures', 'NodesAndEdgesAndOnlySpatial']
+InputGraphs = ["3neighbours_muonQOverPt/", "all_connections_muonQOverPt/"]
+GraphName = "vix_graph_6Nov"
+Epochs = 50
 ########   customization end   #########
 
 path = os.getcwd()
@@ -30,21 +35,32 @@
 print("OutputDir: %s" %(OutputDir))
 
 ##### creating job #####
-
-with open('%s/exec/job_train_model.sh' %(WORKDIR), 'w') as fout:
-    fout.write("#!/bin/sh\n")
-    fout.write("echo\n")
-    fout.write("echo\n")
-    fout.write("echo 'START---------------'\n")
-    fout.write("echo 'WORKDIR ' ${PWD}\n")
-    fout.write("cd "+str(path)+"\n")
-    fout.write("source pyenv/bin/activate\n")
-    fout.write("echo 'Saving Model in  %s' \n" %(OutputDir))
-    fout.write("python tools/training/TrainModelFromGraph.py --graph_path %s --out_path %s --do_train \n" %(InputFolder, OutputDir))  
-    fout.write("echo 'STOP---------------'\n")
-    fout.write("echo\n")
-    fout.write("echo\n")
-os.system("chmod 755 %s/exec/job_train_model.sh" %(WORKDIR))
+file_count = 0
+for model in ModelTypes:
+    for normalization in NormalizationTypes: 
+        for input_graph in InputGraphs:
+            file_count += 1
+            print("Creating job for model %s with normalization %s and input graphs %s" %(model, normalization, input_graph))
+            SaveTag = model + "_" + normalization + "_Bsize64_lr5e-4_241106_20files_"
+            if "all" in input_graph:
+                SaveTag = SaveTag + "allConnections"
+            else:
+                SaveTag = SaveTag + "3neighbours"
+
+            with open('%s/exec/job_train_model_%02d.sh' %(WORKDIR, file_count), 'w') as fout:
+                fout.write("#!/bin/sh\n")
+                fout.write("echo\n")
+                fout.write("echo\n")
+                fout.write("echo 'START---------------'\n")
+                fout.write("echo 'WORKDIR ' ${PWD}\n")
+                fout.write("cd "+str(path)+"\n")
+                fout.write("source pyenv/bin/activate\n")
+                fout.write("echo 'Saving Model in  %s' \n" %(OutputDir))
+                fout.write("python tools/training/TrainModelFromGraph.py --model_type %s --hidden_dim 32 --normalization %s --graph_path %s --out_path %s --do_train --save_tag %s --batch_size 1024 --learning_rate 0.001 --num_files 20 --graph_name %s --epochs %d\n" %(model, normalization, InputFolder+input_graph, OutputDir, SaveTag, GraphName, Epochs))  
+                fout.write("echo 'STOP---------------'\n")
+                fout.write("echo\n")
+                fout.write("echo\n")
+            os.system("chmod 755 %s/exec/job_train_model_%02d.sh" %(WORKDIR, file_count))
 
 ###### create submit.sub file ####
 with open('submit.sub', 'w') as fout: