Skip to content

Commit

Permalink
added mean aggregation over each metric
Browse files Browse the repository at this point in the history
  • Loading branch information
jovi1994 committed Dec 4, 2024
1 parent d5b59df commit 01855f8
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 0 deletions.
20 changes: 20 additions & 0 deletions steps/aggregate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,26 @@ printf "\n" >> "${all}"
echo "All $(wc -l "${all}" | xargs) projects aggregated$("${LOCAL}/help/tdiff.sh" "${start}")"
printf "\n"

mkdir -p "${TARGET}/data/aggregation"

jobs=${TARGET}/temp/jobs/aggregate-function-jobs.txt
rm -rf "${jobs}"
mkdir -p "$(dirname "${jobs}")"
touch "${jobs}"

for metric in ${metrics}; do
metric_file="${TARGET}/data/${metric}.csv"

if [[ -f "${metric_file}" ]]; then
sh="${LOCAL}/steps/aggregation-functions/mean.sh"
output_folder="${TARGET}/data/aggregation"
printf "%s %s %s %s %s %s\n" "${sh@Q}" "${metric_file}" "${output_folder@Q}" "${metric@Q}" >> "${jobs}"
fi
done

"${LOCAL}/help/parallel.sh" "${jobs}"
wait

jobs=${TARGET}/temp/jobs/aggregate-join-jobs.txt
rm -rf "${jobs}"
mkdir -p "$(dirname "${jobs}")"
Expand Down
52 changes: 52 additions & 0 deletions steps/aggregation-functions/mean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env bash
# The MIT License (MIT)
#
# Copyright (c) 2021-2024 Yegor Bugayenko
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
set -e
set -o pipefail

metric_file=$1
output_folder=$2
metric_name=$3

mkdir -p "${output_folder}"

sum=0
count=0

values=$(awk -F, 'NR > 1 {print $3}' "${metric_file}")

while IFS= read -r value; do
sum=$(echo "$sum + $value" | bc)
count=$((count + 1))
done <<< "${values}"

if ((count > 0)); then
mean=$(echo "scale=3; $sum / $count" | bc)

formatted_mean=$(printf "%0.3f" "$mean")

output_file="${output_folder}/${metric_name}.mean.csv"
echo "$formatted_mean" > "${output_file}"
echo "Aggregated mean for ${metric_name}: $formatted_mean"
else
echo "No valid data to aggregate for ${metric_name}"
fi
99 changes: 99 additions & 0 deletions tests/steps/aggregation-functions/test-mean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env bash
# The MIT License (MIT)
#
# Copyright (c) 2021-2024 Yegor Bugayenko
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
set -e
set -o pipefail

stdout=$2

{
dir="${TARGET}"
mkdir -p "${dir}"
touch "${dir}/LCOM5.csv"
echo "repo,java_file,LCOM5" > "${dir}/LCOM5.csv"
echo "kek,src/main/kek,42.0000" >> "${dir}/LCOM5.csv"

"${LOCAL}/steps/aggregation-functions/mean.sh" "${dir}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5"

test -e "${TARGET}/data/aggregation/LCOM5.mean.csv"
mean_value=$(cat "${TARGET}/data/aggregation/LCOM5.mean.csv")
test "$mean_value" = "42.000"
} > "${stdout}" 2>&1
echo "👍🏻 Single metric (LCOM5) mean calculated correctly"


{
dir1="${TARGET}"
mkdir -p "${dir1}"
touch "${dir1}/LCOM5.csv"
echo "repo,java_file,LCOM5" > "${dir1}/LCOM5.csv"
echo "kek,src/main/kek,42.000" >> "${dir1}/LCOM5.csv"

touch "${dir1}/NHD.csv"
echo "repo,java_file,NHD" > "${dir1}/NHD.csv"
echo "kek,src/main/kek,1000.000" >> "${dir1}/NHD.csv"

"${LOCAL}/steps/aggregation-functions/mean.sh" "${dir1}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5"

"${LOCAL}/steps/aggregation-functions/mean.sh" "${dir1}/NHD.csv" "${TARGET}/data/aggregation" "NHD"

test -e "${TARGET}/data/aggregation/LCOM5.mean.csv"
mean_value_lcom5=$(cat "${TARGET}/data/aggregation/LCOM5.mean.csv")
test "$mean_value_lcom5" = "42.000"

test -e "${TARGET}/data/aggregation/NHD.mean.csv"
mean_value_nhd=$(cat "${TARGET}/data/aggregation/NHD.mean.csv")
test "$mean_value_nhd" = "1000.000"
} > "${stdout}" 2>&1
echo "👍🏻 Multiple metrics (LCOM5, NHD) aggregated correctly"


{
dir1="${TARGET}"
mkdir -p "${dir1}"
touch "${dir1}/First.java.m.LCOM5"
echo "repo,java_file,LCOM5" > "${dir1}/LCOM5.csv"
echo "kek,src/main/kek,42.000" >> "${dir1}/LCOM5.csv"
echo "kek,src/main/kek,35.000" >> "${dir1}/LCOM5.csv"
echo "kek,src/main/kek,50.000" >> "${dir1}/LCOM5.csv"

"${LOCAL}/steps/aggregation-functions/mean.sh" "${dir1}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5"

test -e "${TARGET}/data/aggregation/LCOM5.mean.csv"
mean_value=$(cat "${TARGET}/data/aggregation/LCOM5.mean.csv")
test "$mean_value" = "42.333"
} > "${stdout}" 2>&1
echo "👍🏻 Mixed metrics aggregated correctly (LCOM5)"

{
dir="${TARGET}"
mkdir -p "${dir}"
touch "${dir}/Empty.java.m.LCOM5"
echo "repo,java_file,LCOM5" > "${dir}/Empty.java.m.LCOM5"

"${LOCAL}/steps/aggregation-functions/mean.sh" "${dir}/Empty.java.m.LCOM5" "${TARGET}/data/aggregation" "LCOM5"

test -e "${TARGET}/data/aggregation/LCOM5.mean.csv"
mean_value=$(cat "${TARGET}/data/aggregation/LCOM5.mean.csv")
test "$mean_value" = "0.000"
} > "${stdout}" 2>&1
echo "👍🏻 Edge case with no data handled correctly"

0 comments on commit 01855f8

Please sign in to comment.