eic · rahmans1 · Dec 10, 2024 · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/benchmarks/output_branch_size_scan/config.yml b/benchmarks/output_branch_size_scan/config.yml
@@ -0,0 +1,24 @@
+sim:output_branch_size_scan:
+  stage: simulate
+  extends: .det_benchmark
+  script:
+    bash generate.sh
+
+
+
+bench:output_branch_size_scan:
+  stage: benchmarks
+  extends: .det_benchmark
+  needs:
+    - ["sim:output_branch_size_scan"]
+  script:
+    bash output_branch_size_scan.sh
+
+
+results:output_branch_size_scan:
+  stage: collect
+  extends: .det_benchmark  
+  needs: 
+    - ["bench:output_branch_size_scan"]
+  script:
+
diff --git a/benchmarks/output_branch_size_scan/generate.sh b/benchmarks/output_branch_size_scan/generate.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+set -Euo pipefail
+trap 's=$?; echo "$0: Error on line "$LINENO": $BASH_COMMAND"; exit $s' ERR
+IFS=$'\n\t'
+
+NUM_EVENTS=400
+INPUT_FILE=root://dtn-eic.jlab.org//work/eic2/EPIC/EVGEN/DIS/NC/18x275/minQ2=1/pythia8NCDIS_18x275_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_1.hepmc3.tree.root
+
+DETECTOR_CONFIG=epic_craterlake
+EBEAM=18
+PBEAM=275
+
+npsim \
+    --runType batch \
+    --random.seed 1 \
+    --random.enableEventSeed \
+    --printLevel WARNING \
+    --skipNEvents 0 \
+    --numberOfEvents 400 \
+    --filter.tracker 'edep0' \
+    --hepmc3.useHepMC3 true \
+    --compactFile ${DETECTOR_PATH}/${DETECTOR_CONFIG}${EBEAM:+${PBEAM:+_${EBEAM}x${PBEAM}}}.xml \
+    --inputFiles ${INPUT_FILE} \
+    --outputFile current_campaign.edm4hep.root
+
+eicrecon \
+    -Ppodio:output_file="current_campaign.eicrecon.tree.edm4eic.root" \
+    -Pjana:warmup_timeout=0 -Pjana:timeout=0 \
+    -Pplugins=janadot \
+    "current_campaign.edm4hep.root"
+
+
diff --git a/benchmarks/output_branch_size_scan/output_branch_size_scan.sh b/benchmarks/output_branch_size_scan/output_branch_size_scan.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+root -q -b eicrecon.tree.edm4eic.root -e 'for (auto b : *events->GetListOfLeaves()) { if (events->GetBranch(b->GetName()) == nullptr) continue; cout << events->GetBranch(b->GetName())->GetTotalSize() << " " << b->GetName() << endl; }' | sort -n > branch_size_current.txt
+root -q -b root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/<default_file> -e 'for (auto b : *events->GetListOfLeaves()) { if (events->GetBranch(b->GetName()) == nullptr) continue; cout << events->GetBranch(b->GetName())->GetTotalSize() << " " << b->GetName() << endl; }' | sort -n > branch_size_default.txt
+python plot.py -c branch_size_current.txt -d branch_size_default.txt
diff --git a/benchmarks/output_branch_size_scan/plot.py b/benchmarks/output_branch_size_scan/plot.py
@@ -0,0 +1,63 @@
+import pandas as pd
+import argparse
+import matplotlib.pyplot as plt
+
+parser = argparse.ArgumentParser(prog='Plot output branch sizes', description='Plot output branch sizes')
+
+parser.add_argument("-c", dest="current_campaign_file", action="store", required=True, help="Enter the current campaign file")
+parser.add_argument("-d", dest="default_file", action="store", required=True, help="Enter the default file")
+
+args=parser.parse_args()
+
+
+campaign1=args.current_campaign_file
+campaign2=args.default_file
+
+
+# Load the data from the CSV file
+df1 = pd.read_csv(campaign1+'.txt', header=None)
+df2 = pd.read_csv(campaign2+'.txt', header=None)
+
+# Plot the third column ('Value') against the first column ('Object')
+plt.figure(figsize=(10,6))
+plt.scatter(df1.iloc[:,0], df1.iloc[:,2])
+plt.scatter(df2.iloc[:,0], df2.iloc[:,2])
+
+plt.title("Branch Sizes (Bytes) vs Branch Names")
+
+
+
+
+# Show the figure
+plt.tight_layout()
+plt.yscale('log')
+plt.savefig(campaign1+'_vs_'+campaign2+'.png')
+
+print(df1)
+print(df2)
+
+# Assuming both dataframes have the same structure and the first column is branch name
+# Merge the two dataframes on the branch name (first column)
+merged_df = pd.merge(df1.iloc[:, [0, 2]], df2.iloc[:, [0, 2]], on=df1.columns[0], suffixes=('_' + campaign1, '_' + campaign2))
+
+# Create a new column that calculates the difference between the third columns of the two DataFrames
+merged_df['Difference'] = merged_df.iloc[:, 1] - merged_df.iloc[:, 2]
+
+# Create a new DataFrame with the branch names and the difference
+result_df = merged_df[[df1.columns[0], 'Difference']]
+
+# Display the resulting DataFrame
+print(result_df)
+
+# Sort the DataFrame by the absolute value of the difference in descending order
+sorted_df = result_df.reindex(result_df['Difference'].abs().sort_values(ascending=False).index)
+
+# Pick the top 10 branches with the largest differences
+top_20_branches = sorted_df.head(20)
+
+# Display the top 10 branches
+print(top_20_branches)
+
+
+# Optionally, save it to a new CSV file
+sorted_df.to_csv(f"{campaign1}_vs_{campaign2}_difference.csv", index=False)