From 4d58068524ee9bb514119770abdb10cfdf1aa1b4 Mon Sep 17 00:00:00 2001 From: Michael Ernst Date: Sat, 11 May 2024 10:24:53 -0700 Subject: [PATCH] Partial changes from `update-imports` branch --- .github/workflows/check-reproducibility.yml | 2 + Makefile | 2 +- README.md | 5 +- run.sh | 62 ++++++++++++++++--- src/python/replay_merge.py | 36 +++++++++-- src/python/select_from_results.py | 30 +++++++-- .../merge_tools/gitmerge_ort_imports.sh | 26 +++++--- .../gitmerge_ort_imports_ignorespace.sh | 27 +++++--- 8 files changed, 151 insertions(+), 39 deletions(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index 36f7780179..7a1b33a7c7 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -40,6 +40,8 @@ jobs: - name: Clean caches & workspace run: make clean - run: echo "LOGURU_COLORIZE=NO" >> $GITHUB_ENV + - name: Build + run: cd src/scripts/merge_tools/merging && ./gradlew -q shadowJar - name: make check-merges-reproducibility run: | git config --global user.email "example@example.com" diff --git a/Makefile b/Makefile index 4ee0c98848..07f265c7de 100644 --- a/Makefile +++ b/Makefile @@ -134,7 +134,7 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel using GNU Parallel..." @set -e; \ - tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel -j 50% python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {} + tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel --halt now,fail=1 -j 50% python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {} protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + diff --git a/README.md b/README.md index 3f86f15a61..80c0ff763d 100644 --- a/README.md +++ b/README.md @@ -215,6 +215,5 @@ To run style checking run `make style`. ## Comparing merge algorithms To investigate differences between two mergers: - * edit file `src/python/select_from_results.py` to reflect the differences you are interested in. - * run `src/python/select_from_results.py` to create a .csv database containing only the differences. - * run `src/python/replay_merge.py --merges_csv CSV_FILE --idx INDEX` (maybe add `-test`) for the index of the merge you are interested in. + * run `src/python/select_from_results.py` to produce a CSV file containing only the differences you are interested in. See its [documentation (at top of file)](src/python/select_from_results.py) for how to run it. + * run `src/python/replay_merge.py --idx INDEX` (maybe add `-test`) for the index of the merge you are interested in. diff --git a/run.sh b/run.sh index bd124bcdac..63b5c6d8da 100755 --- a/run.sh +++ b/run.sh @@ -23,6 +23,13 @@ OUT_DIR="results/$RUN_NAME" N_MERGES=$3 CACHE_DIR="${4}" +# Do not run this script on MacOS. +backend=$(uname -s) +if [ "$backend" = "Darwin" ]; then + echo "Error: MacOS is not supported. Please run the script on a Linux machine. This is due to the use of readarray in certain merge tools." + exit 1 +fi + comparator_flags="" no_timing=false only_plotting=false @@ -52,7 +59,15 @@ while [ $# -gt 0 ]; do shift done -PATH=$(pwd)/src/scripts/merge_tools/:$PATH +# Check if src/scripts/merge_tools/merging is present +if [ ! -d src/scripts/merge_tools/merging ]; then + echo "Error: src/scripts/merge_tools/merging is missing. This is a submodule that is required for the script to run." + echo "Please run 'git submodule update --init' to fetch the submodule." + exit 1 +fi + +PATH=$(pwd)/src/scripts/merge_tools:$PATH +PATH=$(pwd)/src/scripts/merge_tools/merging/src/main/sh:$PATH export PATH GIT_CONFIG_GLOBAL=$(pwd)/.gitconfig @@ -72,6 +87,13 @@ if [ -z "${JAVA17_HOME:+isset}" ] ; then echo "JAVA17_HOME is not set"; exit 1; if [ -z "${machine_id:+isset}" ] ; then machine_id=0; fi if [ -z "${num_machines:+isset}" ] ; then num_machines=1; fi +export JAVA_HOME=$JAVA17_HOME +if [ ! -f ./src/scripts/merge_tools/merging/.git ] ; then + git submodule update --init --recursive +fi + +(cd ./src/scripts/merge_tools/merging && ./gradlew shadowJar) + echo "Machine ID: $machine_id" echo "Number of machines: $num_machines" echo "Output directory: $OUT_DIR" @@ -93,6 +115,7 @@ if [ "$only_plotting" = true ]; then exit 0 fi +export JAVA_HOME=$JAVA11_HOME ./gradlew -q assemble -g ../.gradle/ mkdir -p "$OUT_DIR" @@ -126,13 +149,25 @@ java -cp build/libs/astmergeevaluation-all.jar \ "$OUT_DIR/merges" # Sample 5* merges -read -ra merge_comparator_flags <<<"${comparator_flags}" -python3 src/python/merges_sampler.py \ - --repos_head_passes_csv "$OUT_DIR/repos_head_passes.csv" \ - --merges_path "$OUT_DIR/merges/" \ - --output_dir "$OUT_DIR/merges_sampled/" \ - --n_merges "$((5 * "$N_MERGES"))" \ - "${merge_comparator_flags[@]}" +total_merges=$((5 * N_MERGES)) + +# Ensure comparator_flags is set, but default to an empty array if not +if [[ -n "${comparator_flags}" ]]; then + read -ra merge_comparator_flags <<< "${comparator_flags}" + python3 src/python/merges_sampler.py \ + --repos_head_passes_csv "$OUT_DIR/repos_head_passes.csv" \ + --merges_path "$OUT_DIR/merges/" \ + --output_dir "$OUT_DIR/merges_sampled/" \ + --n_merges "$total_merges" \ + "${merge_comparator_flags[@]}" +else + echo "Warning: 'comparator_flags' is empty, continuing without additional flags." + python3 src/python/merges_sampler.py \ + --repos_head_passes_csv "$OUT_DIR/repos_head_passes.csv" \ + --merges_path "$OUT_DIR/merges/" \ + --output_dir "$OUT_DIR/merges_sampled/" \ + --n_merges "$total_merges" +fi python3 src/python/split_repos.py \ --repos_csv "$OUT_DIR/repos_head_passes.csv" \ @@ -165,6 +200,17 @@ if [ "$no_timing" = false ]; then --n_timings 3 \ --cache_dir "$CACHE_DIR" extra_args+=(--timed_merges_path "$OUT_DIR/merges_timed/") + + python3 src/python/latex_output.py \ + --run_name "$RUN_NAME" \ + --merges_path "$OUT_DIR/merges/" \ + --tested_merges_path "$OUT_DIR/merges_tested/" \ + --analyzed_merges_path "$OUT_DIR/merges_analyzed/" \ + --timed_merges_path "$OUT_DIR/merges_timed/" \ + --full_repos_csv "$REPOS_CSV_WITH_HASHES" \ + --repos_head_passes_csv "$OUT_DIR/repos_head_passes.csv" \ + --n_merges "$N_MERGES" \ + --output_dir "$OUT_DIR" fi python3 src/python/latex_output.py \ diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index e55f7484d4..f947b52c86 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -1,6 +1,11 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -"""Replay merges and their test results""" +"""Replay merges and their test results. + +Typical usage: + replay_merge.py --idx INDEX +where INDEX is, for example, 38-192 . +""" import argparse import os import sys @@ -50,7 +55,7 @@ def delete_workdirs(results_df: pd.DataFrame) -> None: logger.info("Workdirs deleted") -# pylint: disable=too-many-arguments, too-many-locals +# pylint: disable=too-many-arguments, too-many-locals, too-many-branches, too-many-statements def merge_replay( merge_idx: str, repo_slug: str, @@ -95,9 +100,12 @@ def merge_replay( logger.info( f"workdir {WORKDIR_DIRECTORY / workdir} already exists for idx: {merge_idx}" ) - answer = input( - f"workdir {workdir} exists for idx: {merge_idx}. Delete it? (y/n)" - ) + if delete_workdir: + answer = "y" + else: + answer = input( + f"workdir {workdir} exists for idx: {merge_idx}. Delete it? (y/n)" + ) if answer == "y": shutil.rmtree(WORKDIR_DIRECTORY / workdir) else: @@ -276,6 +284,11 @@ def merge_replay( help="Don't check the fingerprint of a merge", action="store_true", ) + parser.add_argument( + "-skip_build", + help="Build the merge tool", + action="store_false", + ) parser.add_argument( "-create_artifacts", help="Create artifacts", @@ -297,6 +310,19 @@ def merge_replay( logger.info("Testing the replay of a merge") if args.create_artifacts: logger.info("Creating artifacts after replaying the merges") + if args.skip_build: + logger.info("Building merge tool") + + os.environ["PATH"] += os.pathsep + os.path.join( + os.getcwd(), "src/scripts/merge_tools/merging/src/main/sh/" + ) + os.environ["PATH"] += os.pathsep + os.path.join( + os.getcwd(), "src/scripts/merge_tools" + ) + os.environ["GIT_CONFIG_GLOBAL"] = os.getcwd() + "/.gitconfig" + if not args.skip_build: + os.system("cd src/scripts/merge_tools/merging && ./gradlew -q shadowJar") + os.system("git submodule update --init") df = pd.read_csv(args.merges_csv, index_col="idx") diff --git a/src/python/select_from_results.py b/src/python/select_from_results.py index fe7ffbf60d..c82e5dbcc0 100755 --- a/src/python/select_from_results.py +++ b/src/python/select_from_results.py @@ -13,9 +13,14 @@ Here are example invocations: select_from_results.py '(gitmerge_ort == "Merge_failed") and (spork != "Merge_failed")' select_from_results.py '(gitmerge_ort == "Merge_failed") != (spork == "Merge_failed")' + +The resulting .csv is useful for manual examination but cannot be passed to +`replay_merge.py` because that requires a .csv file with all tools and all +fingerprints. """ import argparse +import os from os import system import re import tempfile @@ -25,9 +30,9 @@ def columns_in_query(query): """Returns all the identifiers used in the query.""" result = re.findall(r"""(?> "$attributes_file" +git config --local merge.merge-java.name "Merge Java files" +git config --local merge.merge-java.driver 'java-merge-driver.sh "%A" "%O" "%B"' + +git merge --no-edit "$branch2" +retVal=$? + +# report conflicts +if [ "$retVal" -ne 0 ]; then + echo "Conflict" fi -exit 0 +exit "$retVal" diff --git a/src/scripts/merge_tools/gitmerge_ort_imports_ignorespace.sh b/src/scripts/merge_tools/gitmerge_ort_imports_ignorespace.sh index fc245a003d..1058e25dc7 100755 --- a/src/scripts/merge_tools/gitmerge_ort_imports_ignorespace.sh +++ b/src/scripts/merge_tools/gitmerge_ort_imports_ignorespace.sh @@ -6,15 +6,26 @@ MERGE_SCRIPTS_DIR="$(cd "$(dirname "$0")" && pwd -P)" clone_dir=$1 branch1=$2 branch2=$3 -strategy="-s ort -Xignore-space-change" -if "$MERGE_SCRIPTS_DIR"/gitmerge.sh "$clone_dir" "$branch1" "$branch2" "$strategy"; then - exit 0 -fi +strategy="-Xignore-space-change" + +# shellcheck disable=SC2153 # "JAVA17_HOME" is not a misspelling of "JAVA_HOME" +export JAVA_HOME="$JAVA17_HOME" cd "$clone_dir" || exit 1 -if ! "$MERGE_SCRIPTS_DIR"/resolve-import-conflicts; then - echo "Conflict" - exit 1 + +git checkout "$branch1" --force + +attributes_file=".git/info/attributes" +echo "*.java merge=merge-java" >> "$attributes_file" +git config --local merge.merge-java.name "Merge Java files" +git config --local merge.merge-java.driver 'java-merge-driver.sh "%A" "%O" "%B"' + +git merge --no-edit "$strategy" "$branch2" +retVal=$? + +# report conflicts +if [ "$retVal" -ne 0 ]; then + echo "Conflict" fi -exit 0 +exit "$retVal"