Skip to content

Commit

Permalink
Partial changes from update-imports branch
Browse files Browse the repository at this point in the history
  • Loading branch information
mernst committed May 11, 2024
1 parent d5ed25c commit 4d58068
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 39 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/check-reproducibility.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ jobs:
- name: Clean caches & workspace
run: make clean
- run: echo "LOGURU_COLORIZE=NO" >> $GITHUB_ENV
- name: Build
run: cd src/scripts/merge_tools/merging && ./gradlew -q shadowJar
- name: make check-merges-reproducibility
run: |
git config --global user.email "[email protected]"
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ clean-local:
check-merges-reproducibility:
@echo "Running replay_merge for each idx in parallel using GNU Parallel..."
@set -e; \
tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel -j 50% python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {}
tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel --halt now,fail=1 -j 50% python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {}

protect-repos:
find repos -mindepth 1 -type d -exec chmod a-w {} +
Expand Down
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,5 @@ To run style checking run `make style`.
## Comparing merge algorithms

To investigate differences between two mergers:
* edit file `src/python/select_from_results.py` to reflect the differences you are interested in.
* run `src/python/select_from_results.py` to create a .csv database containing only the differences.
* run `src/python/replay_merge.py --merges_csv CSV_FILE --idx INDEX` (maybe add `-test`) for the index of the merge you are interested in.
* run `src/python/select_from_results.py` to produce a CSV file containing only the differences you are interested in. See its [documentation (at top of file)](src/python/select_from_results.py) for how to run it.
* run `src/python/replay_merge.py --idx INDEX` (maybe add `-test`) for the index of the merge you are interested in.
62 changes: 54 additions & 8 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ OUT_DIR="results/$RUN_NAME"
N_MERGES=$3
CACHE_DIR="${4}"

# Do not run this script on MacOS.
backend=$(uname -s)
if [ "$backend" = "Darwin" ]; then
echo "Error: MacOS is not supported. Please run the script on a Linux machine. This is due to the use of readarray in certain merge tools."
exit 1
fi

comparator_flags=""
no_timing=false
only_plotting=false
Expand Down Expand Up @@ -52,7 +59,15 @@ while [ $# -gt 0 ]; do
shift
done

PATH=$(pwd)/src/scripts/merge_tools/:$PATH
# Check if src/scripts/merge_tools/merging is present
if [ ! -d src/scripts/merge_tools/merging ]; then
echo "Error: src/scripts/merge_tools/merging is missing. This is a submodule that is required for the script to run."
echo "Please run 'git submodule update --init' to fetch the submodule."
exit 1
fi

PATH=$(pwd)/src/scripts/merge_tools:$PATH
PATH=$(pwd)/src/scripts/merge_tools/merging/src/main/sh:$PATH
export PATH

GIT_CONFIG_GLOBAL=$(pwd)/.gitconfig
Expand All @@ -72,6 +87,13 @@ if [ -z "${JAVA17_HOME:+isset}" ] ; then echo "JAVA17_HOME is not set"; exit 1;
if [ -z "${machine_id:+isset}" ] ; then machine_id=0; fi
if [ -z "${num_machines:+isset}" ] ; then num_machines=1; fi

export JAVA_HOME=$JAVA17_HOME
if [ ! -f ./src/scripts/merge_tools/merging/.git ] ; then
git submodule update --init --recursive
fi

(cd ./src/scripts/merge_tools/merging && ./gradlew shadowJar)

echo "Machine ID: $machine_id"
echo "Number of machines: $num_machines"
echo "Output directory: $OUT_DIR"
Expand All @@ -93,6 +115,7 @@ if [ "$only_plotting" = true ]; then
exit 0
fi

export JAVA_HOME=$JAVA11_HOME
./gradlew -q assemble -g ../.gradle/

mkdir -p "$OUT_DIR"
Expand Down Expand Up @@ -126,13 +149,25 @@ java -cp build/libs/astmergeevaluation-all.jar \
"$OUT_DIR/merges"

# Sample 5*<n_merges> merges
read -ra merge_comparator_flags <<<"${comparator_flags}"
python3 src/python/merges_sampler.py \
--repos_head_passes_csv "$OUT_DIR/repos_head_passes.csv" \
--merges_path "$OUT_DIR/merges/" \
--output_dir "$OUT_DIR/merges_sampled/" \
--n_merges "$((5 * "$N_MERGES"))" \
"${merge_comparator_flags[@]}"
total_merges=$((5 * N_MERGES))

# Ensure comparator_flags is set, but default to an empty array if not
if [[ -n "${comparator_flags}" ]]; then
read -ra merge_comparator_flags <<< "${comparator_flags}"
python3 src/python/merges_sampler.py \
--repos_head_passes_csv "$OUT_DIR/repos_head_passes.csv" \
--merges_path "$OUT_DIR/merges/" \
--output_dir "$OUT_DIR/merges_sampled/" \
--n_merges "$total_merges" \
"${merge_comparator_flags[@]}"
else
echo "Warning: 'comparator_flags' is empty, continuing without additional flags."
python3 src/python/merges_sampler.py \
--repos_head_passes_csv "$OUT_DIR/repos_head_passes.csv" \
--merges_path "$OUT_DIR/merges/" \
--output_dir "$OUT_DIR/merges_sampled/" \
--n_merges "$total_merges"
fi

python3 src/python/split_repos.py \
--repos_csv "$OUT_DIR/repos_head_passes.csv" \
Expand Down Expand Up @@ -165,6 +200,17 @@ if [ "$no_timing" = false ]; then
--n_timings 3 \
--cache_dir "$CACHE_DIR"
extra_args+=(--timed_merges_path "$OUT_DIR/merges_timed/")

python3 src/python/latex_output.py \
--run_name "$RUN_NAME" \
--merges_path "$OUT_DIR/merges/" \
--tested_merges_path "$OUT_DIR/merges_tested/" \
--analyzed_merges_path "$OUT_DIR/merges_analyzed/" \
--timed_merges_path "$OUT_DIR/merges_timed/" \
--full_repos_csv "$REPOS_CSV_WITH_HASHES" \
--repos_head_passes_csv "$OUT_DIR/repos_head_passes.csv" \
--n_merges "$N_MERGES" \
--output_dir "$OUT_DIR"
fi

python3 src/python/latex_output.py \
Expand Down
36 changes: 31 additions & 5 deletions src/python/replay_merge.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Replay merges and their test results"""
"""Replay merges and their test results.
Typical usage:
replay_merge.py --idx INDEX
where INDEX is, for example, 38-192 .
"""
import argparse
import os
import sys
Expand Down Expand Up @@ -50,7 +55,7 @@ def delete_workdirs(results_df: pd.DataFrame) -> None:
logger.info("Workdirs deleted")


# pylint: disable=too-many-arguments, too-many-locals
# pylint: disable=too-many-arguments, too-many-locals, too-many-branches, too-many-statements
def merge_replay(
merge_idx: str,
repo_slug: str,
Expand Down Expand Up @@ -95,9 +100,12 @@ def merge_replay(
logger.info(
f"workdir {WORKDIR_DIRECTORY / workdir} already exists for idx: {merge_idx}"
)
answer = input(
f"workdir {workdir} exists for idx: {merge_idx}. Delete it? (y/n)"
)
if delete_workdir:
answer = "y"
else:
answer = input(
f"workdir {workdir} exists for idx: {merge_idx}. Delete it? (y/n)"
)
if answer == "y":
shutil.rmtree(WORKDIR_DIRECTORY / workdir)
else:
Expand Down Expand Up @@ -276,6 +284,11 @@ def merge_replay(
help="Don't check the fingerprint of a merge",
action="store_true",
)
parser.add_argument(
"-skip_build",
help="Build the merge tool",
action="store_false",
)
parser.add_argument(
"-create_artifacts",
help="Create artifacts",
Expand All @@ -297,6 +310,19 @@ def merge_replay(
logger.info("Testing the replay of a merge")
if args.create_artifacts:
logger.info("Creating artifacts after replaying the merges")
if args.skip_build:
logger.info("Building merge tool")

os.environ["PATH"] += os.pathsep + os.path.join(
os.getcwd(), "src/scripts/merge_tools/merging/src/main/sh/"
)
os.environ["PATH"] += os.pathsep + os.path.join(
os.getcwd(), "src/scripts/merge_tools"
)
os.environ["GIT_CONFIG_GLOBAL"] = os.getcwd() + "/.gitconfig"
if not args.skip_build:
os.system("cd src/scripts/merge_tools/merging && ./gradlew -q shadowJar")
os.system("git submodule update --init")

df = pd.read_csv(args.merges_csv, index_col="idx")

Expand Down
30 changes: 24 additions & 6 deletions src/python/select_from_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@
Here are example invocations:
select_from_results.py '(gitmerge_ort == "Merge_failed") and (spork != "Merge_failed")'
select_from_results.py '(gitmerge_ort == "Merge_failed") != (spork == "Merge_failed")'
The resulting .csv is useful for manual examination but cannot be passed to
`replay_merge.py` because that requires a .csv file with all tools and all
fingerprints.
"""

import argparse
import os
from os import system
import re
import tempfile
Expand All @@ -25,9 +30,9 @@
def columns_in_query(query):
"""Returns all the identifiers used in the query."""
result = re.findall(r"""(?<!['"])\b[A-Za-z][A-Za-z_]*\b(?!['"])""", query)
if "and" in result:
while "and" in result:
result.remove("and")
if "or" in result:
while "or" in result:
result.remove("or")
return result

Expand All @@ -44,8 +49,11 @@ def main():
description="Outputs a subset of the results, to standard out",
)
parser.add_argument("query")
scriptdir = os.path.dirname(os.path.realpath(__file__))
parser.add_argument(
"--input", action="store", default="../../results/combined/result.csv"
"--input",
action="store",
default=scriptdir + "/" + "../../results/combined/result.csv",
)
parser.add_argument("columns", nargs=argparse.REMAINDER)
args = parser.parse_args()
Expand All @@ -57,16 +65,26 @@ def main():

# Select some columns
columns_to_select = (
["idx", "repo-idx", "merge-idx", "branch_name", "merge", "left", "right"]
[
"idx",
"repo-idx",
"merge-idx",
"branch_name",
"merge",
"left",
"left_tree_fingerprint",
"right",
"right_tree_fingerprint",
]
+ columns_in_query(args.query)
+ args.columns
+ ["repository"]
)
df = df[columns_to_select]

# Gross way to produce output to standard out
with tempfile.TemporaryFile() as tmpfile:
with tempfile.NamedTemporaryFile() as tmpfile:
df.to_csv(tmpfile)
print(tmpfile.name)
system("cat " + tmpfile.name)


Expand Down
26 changes: 18 additions & 8 deletions src/scripts/merge_tools/gitmerge_ort_imports.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,25 @@ MERGE_SCRIPTS_DIR="$(cd "$(dirname "$0")" && pwd -P)"
clone_dir=$1
branch1=$2
branch2=$3
strategy="-s ort"
if "$MERGE_SCRIPTS_DIR"/gitmerge.sh "$clone_dir" "$branch1" "$branch2" "$strategy"; then
exit 0
fi

# shellcheck disable=SC2153 # "JAVA17_HOME" is not a misspelling of "JAVA_HOME"
export JAVA_HOME="$JAVA17_HOME"

cd "$clone_dir" || exit 1
if ! "$MERGE_SCRIPTS_DIR"/resolve-import-conflicts; then
echo "Conflict"
exit 1

git checkout "$branch1" --force

attributes_file=".git/info/attributes"
echo "*.java merge=merge-java" >> "$attributes_file"
git config --local merge.merge-java.name "Merge Java files"
git config --local merge.merge-java.driver 'java-merge-driver.sh "%A" "%O" "%B"'

git merge --no-edit "$branch2"
retVal=$?

# report conflicts
if [ "$retVal" -ne 0 ]; then
echo "Conflict"
fi

exit 0
exit "$retVal"
27 changes: 19 additions & 8 deletions src/scripts/merge_tools/gitmerge_ort_imports_ignorespace.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,26 @@ MERGE_SCRIPTS_DIR="$(cd "$(dirname "$0")" && pwd -P)"
clone_dir=$1
branch1=$2
branch2=$3
strategy="-s ort -Xignore-space-change"
if "$MERGE_SCRIPTS_DIR"/gitmerge.sh "$clone_dir" "$branch1" "$branch2" "$strategy"; then
exit 0
fi
strategy="-Xignore-space-change"

# shellcheck disable=SC2153 # "JAVA17_HOME" is not a misspelling of "JAVA_HOME"
export JAVA_HOME="$JAVA17_HOME"

cd "$clone_dir" || exit 1
if ! "$MERGE_SCRIPTS_DIR"/resolve-import-conflicts; then
echo "Conflict"
exit 1

git checkout "$branch1" --force

attributes_file=".git/info/attributes"
echo "*.java merge=merge-java" >> "$attributes_file"
git config --local merge.merge-java.name "Merge Java files"
git config --local merge.merge-java.driver 'java-merge-driver.sh "%A" "%O" "%B"'

git merge --no-edit "$strategy" "$branch2"
retVal=$?

# report conflicts
if [ "$retVal" -ne 0 ]; then
echo "Conflict"
fi

exit 0
exit "$retVal"

0 comments on commit 4d58068

Please sign in to comment.