yegor256 · yegor256 · Dec 12, 2024 · Dec 7, 2024 · Dec 7, 2024 · Dec 7, 2024
diff --git a/steps/aggregate.sh b/steps/aggregate.sh
@@ -59,28 +59,6 @@ printf "\n" >> "${all}"
 echo "All $(wc -l "${all}" | xargs) projects aggregated$("${LOCAL}/help/tdiff.sh" "${start}")"
 printf "\n"
 
-mkdir -p "${TARGET}/data/aggregation"
-
-jobs=${TARGET}/temp/jobs/aggregate-function-jobs.txt
-rm -rf "${jobs}"
-mkdir -p "$(dirname "${jobs}")"
-touch "${jobs}"
-
-for metric in ${metrics}; do
-    metric_file="${TARGET}/data/${metric}.csv"
-    if [[ -f "${metric_file}" ]]; then
-        output_folder="${TARGET}/data/aggregation"
-        for sh_script in "${LOCAL}/steps/aggregation-functions/"*.sh; do
-            if [[ -f "${sh_script}" ]]; then
-                printf "%s %s %s %s\n" "${sh_script@Q}" "${metric_file}" "${output_folder@Q}" "${metric@Q}" >> "${jobs}"
-            fi
-        done
-    fi
-done
-
-"${LOCAL}/help/parallel.sh" "${jobs}"
-wait
-
 jobs=${TARGET}/temp/jobs/aggregate-join-jobs.txt
 rm -rf "${jobs}"
 mkdir -p "$(dirname "${jobs}")"
@@ -98,4 +76,24 @@ done < "${repos}"
 "${LOCAL}/help/parallel.sh" "${jobs}"
 wait
 
+mkdir -p "${TARGET}/data/aggregation"
+f_jobs=${TARGET}/temp/jobs/aggregate-function-jobs.txt
+rm -rf "${f_jobs}"
+mkdir -p "$(dirname "${f_jobs}")"
+touch "${f_jobs}"
+
+for metric in ${metrics}; do
+    metric_file="${TARGET}/data/${metric}.csv"
+    if [[ -f "${metric_file}" ]]; then
+        output_folder="${TARGET}/data/aggregation"
+        for sh_script in "${LOCAL}/steps/aggregation-functions/"*.sh; do
+            if [[ -f "${sh_script}" ]]; then
+                printf "%s %s %s %s\n" "${sh_script@Q}" "${metric_file}" "${output_folder@Q}" "${metric@Q}" >> "${f_jobs}"
+            fi
+        done
+    fi
+done
+"${LOCAL}/help/parallel.sh" "${f_jobs}"
+wait
+
 echo "All metrics aggregated and joined in ${total} repositories$("${LOCAL}/help/tdiff.sh" "${start}")"
diff --git a/steps/report.sh b/steps/report.sh
@@ -20,6 +20,10 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
+
+# shellcheck disable=SC2128
+# I disabled it, because unwrapping array and take first elem is exactly what i need in this script
+
 set -e
 set -o pipefail
 
@@ -57,6 +61,65 @@ fi
 
 sort -o "${list}" "${list}"
 
+# Create the aggregation table LaTeX file
+aggregation_table=${TARGET}/temp/aggregation_table.tex
+rm -f "${aggregation_table}"
+touch "${aggregation_table}"
+
+# LaTeX escape function to handle special characters
+latex_escape() {
+  echo "$1" | sed 's/&/\\&/g; s/%/\\%/g; s/_/\\_/g; s/\$/\\\$/g; s/#{}/\\{\\}/g; s/\^/\\^/g; s/~/{\~}/g; s/\\/\\\\/g'
+}
+
+{
+  printf "\onecolumn\n"
+  printf "\\centering\n"
+  printf "\\\\begin{longtable}{|l|c|c|c|}\n"
+  printf "\\hline\n"
+  printf "Metric & 90th Percentile & Mean & Median \\\\\\\\\\\\\ \n"
+  printf "\\hline\n"
+} >> "${aggregation_table}"
+
+files=("${TARGET}/data/aggregation/*.csv")
+
+if compgen -G "${files}" > /dev/null; then
+    # Process each CSV file in the aggregation directory
+    for file in ${files}; do
+        # Extract the metric name (e.g., AHF from AHF.90th_percentile.csv)
+        metric=$(basename "${file}" | cut -d '.' -f 1)
+
+        # Extract values from the CSV file
+        value=$(<"${file}")
+
+        # Check which aggregation type this file corresponds to and store it accordingly
+        if [[ "${file}" =~ \.90th_percentile\.csv$ ]]; then
+            percentile="${value}"
+            mean=""
+            median=""
+        elif [[ "${file}" =~ \.mean\.csv$ ]]; then
+            mean="${value}"
+        elif [[ "${file}" =~ \.median\.csv$ ]]; then
+            median="${value}"
+        fi
+
+        # Sanitize the values before inserting into the LaTeX table
+        percentile=$(latex_escape "${percentile}")
+        mean=$(latex_escape "${mean}")
+        median=$(latex_escape "${median}")
+
+        # Write the row for this metric to the LaTeX table
+        if [[ -n "${percentile}" && -n "${mean}" && -n "${median}" ]]; then
+            printf "%s & %s & %s & %s \\\\\\\\\\\\\ \n" "${metric}" "${percentile}" "${mean}" "${median}" >> "${aggregation_table}"
+        fi
+    done
+fi
+
+# Close the LaTeX table
+printf "\\hline\n" >> "${aggregation_table}"
+printf "\\\\end{longtable}\n" >> "${aggregation_table}"
+
+printf "Aggregation table generated in %s\n" "${aggregation_table}"
+
 # It's important to make sure the path is absolute, for LaTeX
 t=$(realpath "${TARGET}")
 

diff --git a/tex/report.tex b/tex/report.tex
@@ -32,6 +32,10 @@
 \usepackage{paralist}
 \usepackage{ffcode}
 \usepackage[capitalize]{cleveref}
+\usepackage{amsmath}    % for mathematical symbols
+\usepackage{graphicx}   % for advanced table formatting
+\usepackage{array}      % for better table control
+\usepackage{longtable}      % for better table control
 
 \usepackage{silence}
   \WarningFilter{microtype}{Unable to apply patch `footnote'}
@@ -194,11 +198,15 @@ \section{Results}\label{sec:results}
 \begin{itemize}
   \input{$TARGET/temp/list-of-metrics.tex}
 \end{itemize}
+Here is a graph of metrics and their aggregated values:
+\iexec{cat "${TARGET}/temp/aggregation_table.tex" }\unskip{}
+
 
 The dataset was built by
 \iexec{nproc}\unskip{}
 CPUs\iexec{"${LOCAL}/help/tdiff.sh" "$(cat "${TARGET}/start.txt")"}\unskip{}.
 
+
 \section{Limitations}\label{sec:limitations}
 
 As of January 2023, \citet{dohmke2023} reported that GitHub hosts more than