From b6762d943d8dee67056222a0ec3922dd21eb98e7 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sat, 4 May 2024 21:40:19 -0700 Subject: [PATCH 01/46] Added CI/CD file --- .github/workflows/check-reproducibility.yml | 50 +++++++++++++++++++++ src/python/replay_merge.py | 23 +++++++--- 2 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/check-reproducibility.yml diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml new file mode 100644 index 0000000000..bcd32af277 --- /dev/null +++ b/.github/workflows/check-reproducibility.yml @@ -0,0 +1,50 @@ +name: Run Small test +on: [push, pull_request] +jobs: + test: + strategy: + matrix: + maven: [ '3.9.2' ] + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + steps: + - uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: 17 + - run: echo "JAVA17_HOME=$JAVA_HOME" >> $GITHUB_ENV + - run: java -version + - uses: actions/checkout@v4 + with: + submodules: 'true' + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v3 + with: + python-version: 3.12 + auto-update-conda: true + mamba-version: "*" + channels: conda-forge,defaults + activate-environment: AST + environment-file: environment.yml + - name: Install maven + uses: s4u/setup-maven-action@v1.8.0 + with: + java-version: 17 + maven-version: ${{ matrix.maven }} + - name: Clean caches & workspace + run: make clean + - run: echo "LOGURU_COLORIZE=NO" >> $GITHUB_ENV + - name: Run small test + run: | + git config --global user.email "example@example.com" + git config --global user.name "Example Example" + head -n 101 results/combined/result.csv > results/combined/result_trimmed.csv + make check-merges-reproducibilit + env: + GITHUB_TOKEN: ${{ secrets.TOKEN_GITHUB }} diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 3aa3561cbc..c7352e44b3 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -3,6 +3,7 @@ """Replay merges and their test results""" import argparse import os +import sys from pathlib import Path import shutil import pandas as pd @@ -75,13 +76,21 @@ def merge_replay( f"workdir {WORKDIR_DIRECTORY/workdir} already exists. Skipping" ) continue - - repo = Repository( - repo_slug, - cache_directory=Path("no_cache/"), - workdir_id=workdir, - delete_workdir=delete_workdir, - ) + try: + repo = Repository( + repo_slug, + cache_directory=Path("no_cache/"), + workdir_id=workdir, + delete_workdir=delete_workdir, + lazy_clone=False, + ) + except Exception as e: + logger.error( + f"Git clone failed for {repo_slug} {merge_data['left']}" + + f"{merge_data['right']} {e}" + ) + # Exit with 0 for CI/CD to not cause problems in case a repo is no longer available + sys.exit(0) ( merge_result, merge_fingerprint, From b578a2139d903d8d703f2cf1298eac6e8216c3c5 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sat, 4 May 2024 21:43:54 -0700 Subject: [PATCH 02/46] Correct typo --- .github/workflows/check-reproducibility.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index bcd32af277..82092e9d52 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -1,4 +1,4 @@ -name: Run Small test +name: Run Reproducibility Check on: [push, pull_request] jobs: test: @@ -45,6 +45,6 @@ jobs: git config --global user.email "example@example.com" git config --global user.name "Example Example" head -n 101 results/combined/result.csv > results/combined/result_trimmed.csv - make check-merges-reproducibilit + make check-merges-reproducibility env: GITHUB_TOKEN: ${{ secrets.TOKEN_GITHUB }} From faf5837f72dac1e9fabcd6dc5432afb398ed9414 Mon Sep 17 00:00:00 2001 From: Michael Ernst Date: Sat, 4 May 2024 22:09:37 -0700 Subject: [PATCH 03/46] Fix name --- .github/workflows/check-reproducibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index 82092e9d52..5d797b3eff 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -40,7 +40,7 @@ jobs: - name: Clean caches & workspace run: make clean - run: echo "LOGURU_COLORIZE=NO" >> $GITHUB_ENV - - name: Run small test + - name: make check-merges-reproducibility run: | git config --global user.email "example@example.com" git config --global user.name "Example Example" From ff378ed29b6b3938aea9018399d0918414c395c4 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sat, 4 May 2024 22:40:37 -0700 Subject: [PATCH 04/46] Safe cloning --- run.sh | 5 ++--- src/python/repo.py | 26 +++++++++++--------------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/run.sh b/run.sh index bf66245c7a..f499439714 100755 --- a/run.sh +++ b/run.sh @@ -108,9 +108,8 @@ fi mkdir -p "$OUT_DIR" # Delete all locks in cache -if [ -d "$CACHE_DIR" ]; then - find "$CACHE_DIR" -name "*.lock" -delete -fi +find "$CACHE_DIR" -name "*.lock" -delete +find "repos" -name "*.lock" -delete # Delete .workdir rm -rf .workdir diff --git a/src/python/repo.py b/src/python/repo.py index 28456c50a2..176e4b67a4 100755 --- a/src/python/repo.py +++ b/src/python/repo.py @@ -23,6 +23,7 @@ set_in_cache, lookup_in_cache, ) +import fasteners import git.repo from variables import ( REPOS_PATH, @@ -209,21 +210,16 @@ def __init__( # pylint: disable=too-many-arguments def clone_repo(self) -> None: """Clones the repository.""" - if self.repo_path.exists(): - return - print( - "Cloning", - self.repo_slug, - "to", - self.repo_path, - "because:", - self.repo_path.exists(), - ) - try: - clone_repo(self.repo_slug, self.repo_path) - except Exception as e: - logger.error("Exception during cloning:\n", e) - raise + lock_path = REPOS_PATH / "locks" / self.repo_slug + lock = fasteners.InterProcessLock(lock_path) + with lock: + if self.repo_path.exists(): + return + try: + clone_repo(self.repo_slug, self.repo_path) + except Exception as e: + logger.error("Exception during cloning:\n", e) + raise if not self.repo_path.exists(): logger.error( f"Repo {self.repo_slug} does not exist after cloning {self.repo_path}" From 27fc76e3b55bb0b753efc13e5e5197dc78984dd5 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sat, 4 May 2024 22:46:00 -0700 Subject: [PATCH 05/46] Fix --- run.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/run.sh b/run.sh index f499439714..fa5dd67d31 100755 --- a/run.sh +++ b/run.sh @@ -107,9 +107,13 @@ fi mkdir -p "$OUT_DIR" -# Delete all locks in cache -find "$CACHE_DIR" -name "*.lock" -delete -find "repos" -name "*.lock" -delete +# Delete all locks +if [ -d "$CACHE_DIR" ]; then + find "$CACHE_DIR" -name "*.lock" -delete +fi +if [ -d "repos" ]; then + find "repos/locks" -name "*.lock" -delete +fi # Delete .workdir rm -rf .workdir From 72064da8c7cc847b622c76bcd58de0f1b813fa39 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sat, 4 May 2024 22:54:03 -0700 Subject: [PATCH 06/46] More verbose --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2f9808426d..fbd06df679 100644 --- a/Makefile +++ b/Makefile @@ -131,7 +131,7 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel..." - @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel -u --halt now,fail=1 -j 0 'python3 src/python/replay_merge.py -delete_workdir --idx {}' + @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel --progress --bar -u --halt now,fail=1 -j 0 'python3 src/python/replay_merge.py -delete_workdir --idx {}' protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + From cdc9a668ebf535ee0d833698be5bffead2567eeb Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sat, 4 May 2024 23:09:43 -0700 Subject: [PATCH 07/46] Fail fast to false --- .github/workflows/check-reproducibility.yml | 1 + Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index 5d797b3eff..dc7b108307 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -3,6 +3,7 @@ on: [push, pull_request] jobs: test: strategy: + fail-fast: false matrix: maven: [ '3.9.2' ] runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index fbd06df679..bfe6bb8ca8 100644 --- a/Makefile +++ b/Makefile @@ -131,7 +131,7 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel..." - @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel --progress --bar -u --halt now,fail=1 -j 0 'python3 src/python/replay_merge.py -delete_workdir --idx {}' + @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel --progress --bar -u --halt now,fail=1 -j 0 'python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {}' protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + From 6dfd006bf28b98643844af701d215f8e5819b0a0 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sat, 4 May 2024 23:11:09 -0700 Subject: [PATCH 08/46] Upgrade java --- .github/workflows/check-reproducibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index dc7b108307..849cf9f4ce 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -11,7 +11,7 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/setup-java@v3 + - uses: actions/setup-java@v4 with: distribution: 'zulu' java-version: 17 From 651ffb0cc6851d4dfbccb295dc7f853112086d19 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sat, 4 May 2024 23:14:53 -0700 Subject: [PATCH 09/46] Upgrade maven --- .github/workflows/check-reproducibility.yml | 2 +- .github/workflows/small-test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index 849cf9f4ce..529ae52378 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -34,7 +34,7 @@ jobs: activate-environment: AST environment-file: environment.yml - name: Install maven - uses: s4u/setup-maven-action@v1.8.0 + uses: s4u/setup-maven-action@v1.12.0 with: java-version: 17 maven-version: ${{ matrix.maven }} diff --git a/.github/workflows/small-test.yml b/.github/workflows/small-test.yml index 3300f54ee5..868b2c2290 100644 --- a/.github/workflows/small-test.yml +++ b/.github/workflows/small-test.yml @@ -47,7 +47,7 @@ jobs: - name: Install PdfLaTeX run: sudo apt update && sudo apt install texlive-latex-extra -y - name: Install maven - uses: s4u/setup-maven-action@v1.8.0 + uses: s4u/setup-maven-action@v1.12.0 with: java-version: 17 maven-version: ${{ matrix.maven }} From 16d706114063f5c47b1a9db1474516c9eb779ffd Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sat, 4 May 2024 23:15:49 -0700 Subject: [PATCH 10/46] Set jobs to 2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bfe6bb8ca8..2c0cc1d25e 100644 --- a/Makefile +++ b/Makefile @@ -131,7 +131,7 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel..." - @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel --progress --bar -u --halt now,fail=1 -j 0 'python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {}' + @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel --progress --bar -u --halt now,fail=1 -j 2 'python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {}' protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + From 21ea90af3d5c83bb8bc5f3f9f96d5e9377d69085 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sat, 4 May 2024 23:26:30 -0700 Subject: [PATCH 11/46] Use 1 process --- .github/workflows/check-reproducibility.yml | 2 +- Makefile | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index 529ae52378..5e153c6963 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -46,6 +46,6 @@ jobs: git config --global user.email "example@example.com" git config --global user.name "Example Example" head -n 101 results/combined/result.csv > results/combined/result_trimmed.csv - make check-merges-reproducibility + make NUM_PROCESSES=1 check-merges-reproducibility env: GITHUB_TOKEN: ${{ secrets.TOKEN_GITHUB }} diff --git a/Makefile b/Makefile index 2c0cc1d25e..f2ae84eb7d 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,8 @@ CSV_RESULTS_GREATEST_HITS = results/greatest_hits/result.csv CSV_RESULTS_REAPER = results/reaper/result.csv CSV_RESULTS = $(CSV_RESULTS_COMBINED) +NUM_PROCESSES = 0 + shell-script-style: shellcheck -e SC2153 -x -P SCRIPTDIR --format=gcc ${SH_SCRIPTS} ${BASH_SCRIPTS} checkbashisms ${SH_SCRIPTS} @@ -131,7 +133,7 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel..." - @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel --progress --bar -u --halt now,fail=1 -j 2 'python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {}' + @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel --progress --bar -u --halt now,fail=1 -j $(NUM_PROCESSES) 'python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {}' protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + From 13ba984d7aa4ba7e1ceebf0898fbfd64a5fd1053 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 11:55:14 -0700 Subject: [PATCH 12/46] Seqeuntial processing --- .github/workflows/check-reproducibility.yml | 1 - Makefile | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index 5e153c6963..61b7ab41e2 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -3,7 +3,6 @@ on: [push, pull_request] jobs: test: strategy: - fail-fast: false matrix: maven: [ '3.9.2' ] runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index f2ae84eb7d..7a37fd6b48 100644 --- a/Makefile +++ b/Makefile @@ -133,8 +133,9 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel..." - @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel --progress --bar -u --halt now,fail=1 -j $(NUM_PROCESSES) 'python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {}' - + @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | while read idx; do \ + python3 src/python/replay_merge.py --merges_csv $$(CSV_RESULTS) --delete_workdir --idx $$idx; \ + done protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + From 757f3ab3e8517c1b4bd9abafd2daa6bff8652bea Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 11:55:56 -0700 Subject: [PATCH 13/46] Seqeuntial processing --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7a37fd6b48..29afeb9505 100644 --- a/Makefile +++ b/Makefile @@ -134,7 +134,7 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel..." @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | while read idx; do \ - python3 src/python/replay_merge.py --merges_csv $$(CSV_RESULTS) --delete_workdir --idx $$idx; \ + python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx $$idx; \ done protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + From 264e10480b2f19533102ef1cce796cb3cd89e518 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 12:28:15 -0700 Subject: [PATCH 14/46] Crash script when one run fails --- Makefile | 6 ++++-- src/python/replay_merge.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 29afeb9505..ece2e8d9a4 100644 --- a/Makefile +++ b/Makefile @@ -133,9 +133,11 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel..." - @tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | while read idx; do \ - python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx $$idx; \ + @set -e; \ + tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | while read idx; do \ + python3 src/python/replay_merge.py --merges_csv $$(CSV_RESULTS) --delete_workdir --idx $$idx; \ done + protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index c7352e44b3..6c5ce28053 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -20,6 +20,7 @@ from loguru import logger logger.add("replay_merge.log", mode="a") +logger.add(sys.stdout, colorize=True) # pylint: disable=too-many-arguments, too-many-locals From db6a44be2669ec90c98851ca3a9953d0b98ce72a Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 12:31:17 -0700 Subject: [PATCH 15/46] Crash script when one run fails --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ece2e8d9a4..afa72fe7d2 100644 --- a/Makefile +++ b/Makefile @@ -135,7 +135,7 @@ check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel..." @set -e; \ tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | while read idx; do \ - python3 src/python/replay_merge.py --merges_csv $$(CSV_RESULTS) --delete_workdir --idx $$idx; \ + python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx $$idx; \ done protect-repos: From 4d6c8ac743d6b21b6210e2fe3d6e69fcec925038 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 12:50:11 -0700 Subject: [PATCH 16/46] More info --- src/python/replay_merge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 6c5ce28053..a4a815d863 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -137,7 +137,8 @@ def merge_replay( raise Exception( f"fingerprints differ: after merge of {workdir} with {merge_tool}, found" + f" {merge_fingerprint} but expected " - + f"{merge_data[f'{merge_tool.name}_merge_fingerprint']}" + + f"{merge_data[f'{merge_tool.name}_merge_fingerprint']} at log path {log_path}" + + f" and repo path {repo.local_repo_path}" ) if merge_result not in ( From 1e00bc4cf47cdd887e622fbcf2048646b3f715a4 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 13:27:43 -0700 Subject: [PATCH 17/46] Print merge file --- src/python/replay_merge.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index a4a815d863..801da968b7 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -134,6 +134,12 @@ def merge_replay( merge_data[f"{merge_tool.name}_merge_fingerprint"] != merge_fingerprint and not dont_check_fingerprints ): + # Print the merge log file + print("=====================================") + with open(log_path, "r", encoding="utf-8") as f: + print(f.read()) + print("=====================================") + raise Exception( f"fingerprints differ: after merge of {workdir} with {merge_tool}, found" + f" {merge_fingerprint} but expected " From 14ea707a70cc6532f4e1c85c9afdc7972cabcb0d Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 13:34:46 -0700 Subject: [PATCH 18/46] Added src/scripts/merge_tools to the path --- src/python/replay_merge.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 801da968b7..6db69978c7 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -233,6 +233,9 @@ def merge_replay( ) args = parser.parse_args() + # Add 'src/scripts/merge_tools' to the path + sys.path.append("src/scripts/merge_tools") + logger.info(f"Replaying merge with index {args.idx}") if args.delete_workdir: logger.info("Deleting workdir after replaying the merge") From 2ec5436031872634f061956f7687aba184330220 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 13:41:01 -0700 Subject: [PATCH 19/46] Added src/scripts/merge_tools to the path --- src/python/replay_merge.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 6db69978c7..8381d4a5c6 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -233,8 +233,9 @@ def merge_replay( ) args = parser.parse_args() - # Add 'src/scripts/merge_tools' to the path - sys.path.append("src/scripts/merge_tools") + os.environ["PATH"] += os.pathsep + os.path.join( + os.getcwd(), "src/scripts/merge_tools" + ) logger.info(f"Replaying merge with index {args.idx}") if args.delete_workdir: From 158e5bf20f027c7d2c25df810978978f39d89d43 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 13:44:18 -0700 Subject: [PATCH 20/46] Print path --- src/scripts/merge_tools/git_hires_merge.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/scripts/merge_tools/git_hires_merge.sh b/src/scripts/merge_tools/git_hires_merge.sh index bc2d1ba275..90ae509345 100755 --- a/src/scripts/merge_tools/git_hires_merge.sh +++ b/src/scripts/merge_tools/git_hires_merge.sh @@ -6,6 +6,9 @@ clone_dir=$1 branch1=$2 branch2=$3 +# Print the current PATH +echo "PATH: $PATH" + cd "$clone_dir" || exit 1 git checkout "$branch1" --force From 65e50ebebbedd36023b3833b03e59db2ea39ea52 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 14:35:05 -0700 Subject: [PATCH 21/46] Update --- .github/workflows/check-reproducibility.yml | 2 +- .github/workflows/small-test.yml | 2 +- src/python/replay_merge.py | 93 ++++++++++++++++----- 3 files changed, 76 insertions(+), 21 deletions(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index 61b7ab41e2..109ece84d8 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -1,4 +1,4 @@ -name: Run Reproducibility Check +name: Reproducibility Check on: [push, pull_request] jobs: test: diff --git a/.github/workflows/small-test.yml b/.github/workflows/small-test.yml index 868b2c2290..3b3acc8ea7 100644 --- a/.github/workflows/small-test.yml +++ b/.github/workflows/small-test.yml @@ -1,4 +1,4 @@ -name: Run Small test +name: Small test on: [push, pull_request] jobs: test: diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 8381d4a5c6..a1d5c2042f 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -23,6 +23,63 @@ logger.add(sys.stdout, colorize=True) +def store_artifacts(result_df: pd.DataFrame) -> None: + """Store artifacts in a tarball with specific directory.""" + # Create temporary directories for the structured archive + base_dir = "archive" + if not os.path.exists(os.path.join(base_dir, "merge_replays")): + os.makedirs(os.path.join(base_dir, "merge_replays")) + if not os.path.exists(os.path.join(base_dir, "logs")): + os.makedirs(os.path.join(base_dir, "logs")) + + # Copy files to the new directory structure + for idx in result_df.index: + repo_path = result_df.loc[idx, "repo path"] + log_path = result_df.loc[idx, "merge log path"] + + # Extract one level higher than the basename + repo_subdir = os.path.join( + *str(repo_path).split(os.sep)[-2:] + ) # Last two components of the path + log_subdir = os.path.basename(log_path) # Just the file name + + # Full new path creation + new_repo_path = os.path.join(base_dir, "merge_replays", repo_subdir) + new_log_path = os.path.join(base_dir, "logs", log_subdir) + + # Ensure directories exist + os.makedirs(os.path.dirname(new_repo_path), exist_ok=True) + + # Copy repository directories or files + if os.path.isdir(repo_path): + shutil.copytree(repo_path, new_repo_path) + else: + shutil.copy(repo_path, new_repo_path) + + # Copy log files + shutil.copy(log_path, new_log_path) + + # Create the tarball from the new directory structure + os.chdir( + base_dir + ) # Change directory to avoid including the 'archive/' prefix in the tarball + os.system("tar -czf ../replay_merge_artifacts.tar.gz merge_replays logs") + os.chdir("..") # Change back to the original directory + + # Clean up the temporary directory + shutil.rmtree(base_dir) + + logger.info("Artifacts created") + + +def delete_workdirs(results_df: pd.DataFrame) -> None: + """Delete the workdirs after replaying the merges.""" + for idx in results_df.index: + os.system("chmod -R 777 " + str(results_df.loc[idx, "repo path"])) + shutil.rmtree(results_df.loc[idx, "repo path"]) + logger.info("Workdirs deleted") + + # pylint: disable=too-many-arguments, too-many-locals def merge_replay( merge_idx: str, @@ -30,6 +87,7 @@ def merge_replay( merge_data: pd.Series, test_merge: bool = False, delete_workdir: bool = True, + create_artifacts: bool = False, dont_check_fingerprints: bool = False, ) -> pd.DataFrame: """Replay a merge and its test results. @@ -82,7 +140,7 @@ def merge_replay( repo_slug, cache_directory=Path("no_cache/"), workdir_id=workdir, - delete_workdir=delete_workdir, + delete_workdir=False, lazy_clone=False, ) except Exception as e: @@ -106,6 +164,7 @@ def merge_replay( timeout=TIMEOUT_TESTING_MERGE, use_cache=False, ) + assert repo.local_repo_path.exists() root_dir = Path("replay_logs") log_path = root_dir / Path( "merges/" @@ -121,6 +180,7 @@ def merge_replay( log_path.parent.mkdir(parents=True, exist_ok=True) with open(log_path, "w", encoding="utf-8") as f: f.write(explanation) + assert repo.local_repo_path.exists() result_df.loc[ merge_tool.name, ["merge result", "merge log path", "repo path", "merge fingerprint"], @@ -130,16 +190,20 @@ def merge_replay( repo.local_repo_path, merge_fingerprint, ] + assert repo.local_repo_path.exists() if ( merge_data[f"{merge_tool.name}_merge_fingerprint"] != merge_fingerprint and not dont_check_fingerprints ): - # Print the merge log file - print("=====================================") + assert repo.local_repo_path.exists() + if create_artifacts: + store_artifacts(result_df) + if delete_workdir: + delete_workdirs(result_df) + print("=====================================\n") with open(log_path, "r", encoding="utf-8") as f: print(f.read()) - print("=====================================") - + print("=====================================\n") raise Exception( f"fingerprints differ: after merge of {workdir} with {merge_tool}, found" + f" {merge_fingerprint} but expected " @@ -256,7 +320,8 @@ def merge_replay( str(repo_slug), merge_data, args.test, - args.delete_workdir and not args.create_artifacts, + args.delete_workdir, + args.create_artifacts, args.dont_check_fingerprints, ) for idx, row in results_df.iterrows(): @@ -275,16 +340,6 @@ def merge_replay( # Create artifacts which means creating a tarball of all the relevant workdirs if args.create_artifacts: - logger.info("Creating artifacts") - os.system( - "tar -czf replay_merge_artifacts.tar.gz " - + " ".join( - [str(results_df.loc[idx, "repo path"]) for idx in results_df.index] - ) - ) - logger.info("Artifacts created") - if args.delete_workdir: - for idx in results_df.index: - os.system("chmod -R 777 " + str(results_df.loc[idx, "repo path"])) - shutil.rmtree(results_df.loc[idx, "repo path"]) - logger.info("Workdirs deleted") + store_artifacts(results_df) + if args.delete_workdir: + delete_workdirs(results_df) From c62cba45ea7920352bead1a5be85d48e0876bdaf Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 14:53:32 -0700 Subject: [PATCH 22/46] Ignore failed merges --- src/python/replay_merge.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index a1d5c2042f..40acac2f70 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -191,7 +191,11 @@ def merge_replay( merge_fingerprint, ] assert repo.local_repo_path.exists() - if ( + if merge_result not in ( + MERGE_STATE.Merge_failed, + MERGE_STATE.Git_checkout_failed, + TEST_STATE.Git_checkout_failed, + ) and ( merge_data[f"{merge_tool.name}_merge_fingerprint"] != merge_fingerprint and not dont_check_fingerprints ): From dd98cad20e0a8244895af18c78f6bf3ad2247b73 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 14:55:19 -0700 Subject: [PATCH 23/46] Better exception --- src/python/replay_merge.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 40acac2f70..88b59c2a1a 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -212,7 +212,9 @@ def merge_replay( f"fingerprints differ: after merge of {workdir} with {merge_tool}, found" + f" {merge_fingerprint} but expected " + f"{merge_data[f'{merge_tool.name}_merge_fingerprint']} at log path {log_path}" - + f" and repo path {repo.local_repo_path}" + + f" and repo path {repo.local_repo_path}", + merge_result, + f"idx {merge_idx}", ) if merge_result not in ( From 95beba77a5d48ff5d9ea78af9732537e8e37fc40 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 16:38:49 -0700 Subject: [PATCH 24/46] Trim the head --- .github/workflows/check-reproducibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index 109ece84d8..fb31f1560f 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -44,7 +44,7 @@ jobs: run: | git config --global user.email "example@example.com" git config --global user.name "Example Example" - head -n 101 results/combined/result.csv > results/combined/result_trimmed.csv + head -n 101 results/combined/result.csv > results/combined/result.csv make NUM_PROCESSES=1 check-merges-reproducibility env: GITHUB_TOKEN: ${{ secrets.TOKEN_GITHUB }} From fe916f9ca82b5269531f1ec9c76c9282e680cecc Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Sun, 5 May 2024 16:52:51 -0700 Subject: [PATCH 25/46] Correct frame triming --- .github/workflows/check-reproducibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index fb31f1560f..e2df8f9f16 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -44,7 +44,7 @@ jobs: run: | git config --global user.email "example@example.com" git config --global user.name "Example Example" - head -n 101 results/combined/result.csv > results/combined/result.csv + head -n 101 results/combined/result.csv > temp.csv && mv temp.csv results/combined/result.csv make NUM_PROCESSES=1 check-merges-reproducibility env: GITHUB_TOKEN: ${{ secrets.TOKEN_GITHUB }} From 216294ede22b26fa191950b213d1714a050fcc1c Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 16:26:49 -0700 Subject: [PATCH 26/46] Add more merges --- .github/workflows/check-reproducibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index e2df8f9f16..222ff81ada 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -44,7 +44,7 @@ jobs: run: | git config --global user.email "example@example.com" git config --global user.name "Example Example" - head -n 101 results/combined/result.csv > temp.csv && mv temp.csv results/combined/result.csv + head -n 151 results/combined/result.csv > temp.csv && mv temp.csv results/combined/result.csv make NUM_PROCESSES=1 check-merges-reproducibility env: GITHUB_TOKEN: ${{ secrets.TOKEN_GITHUB }} From 09139fc988ef6f6f7dea2cfe4b2b41991a8ab2b7 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 16:38:55 -0700 Subject: [PATCH 27/46] Make sure config files are empty --- blanck_git_config.config | 0 run.sh | 9 +++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 blanck_git_config.config diff --git a/blanck_git_config.config b/blanck_git_config.config new file mode 100644 index 0000000000..e69de29bb2 diff --git a/run.sh b/run.sh index fa5dd67d31..994a84e45d 100755 --- a/run.sh +++ b/run.sh @@ -55,8 +55,13 @@ done PATH=$(pwd)/src/scripts/merge_tools/:$PATH export PATH -echo "Checking for custom merge drivers in global configuration..." -merge_drivers=$(git config --global --get-regexp '^merge\..*\.driver$' || echo "No merge drivers set") +export GIT_CONFIG_GLOBAL=$(pwd)/blanck_git_config.config +if git config --list --show-origin | grep 'file:'"$GIT_CONFIG_GLOBAL" > /dev/null; then + echo "Error: Global config is not empty" + exit 1 +else + echo "Global config is empty" +fi if [ "$merge_drivers" == "No merge drivers set" ]; then echo "No custom merge drivers found in global configuration. Proceeding with the evaluation." From a7652be58cad812463faacdeaff7436668528288 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 17:57:48 -0700 Subject: [PATCH 28/46] Add fake merge driver --- .github/workflows/small-test.yml | 2 ++ run.sh | 10 ---------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/small-test.yml b/.github/workflows/small-test.yml index 3b3acc8ea7..c4e1f50f11 100644 --- a/.github/workflows/small-test.yml +++ b/.github/workflows/small-test.yml @@ -58,6 +58,8 @@ jobs: run: | git config --global user.email "example@example.com" git config --global user.name "Example Example" + git config --global merge.customMerge.name "Always incorrect custom merge driver" + git config --global merge.customMerge.driver 'fake-merge-driver %O %A %B %L %P' make small-test env: GITHUB_TOKEN: ${{ secrets.TOKEN_GITHUB }} diff --git a/run.sh b/run.sh index 994a84e45d..649dc5cf9f 100755 --- a/run.sh +++ b/run.sh @@ -63,16 +63,6 @@ else echo "Global config is empty" fi -if [ "$merge_drivers" == "No merge drivers set" ]; then - echo "No custom merge drivers found in global configuration. Proceeding with the evaluation." - # Include other commands to continue the script here -else - echo "Error: Custom merge drivers are set in global configuration." - echo "Please unset them before running the evaluation." - echo "Merge driver found: $merge_drivers" - exit 1 -fi - # Check if cache.tar exists and cache is missing if [ -f cache.tar ] && [ ! -d cache ]; then echo "Decompressing cache.tar" From 5a3d387e7f153347769e2398b09e616d9cdc2487 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 17:59:04 -0700 Subject: [PATCH 29/46] Style fix --- run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/run.sh b/run.sh index 649dc5cf9f..df3dc94cef 100755 --- a/run.sh +++ b/run.sh @@ -55,7 +55,8 @@ done PATH=$(pwd)/src/scripts/merge_tools/:$PATH export PATH -export GIT_CONFIG_GLOBAL=$(pwd)/blanck_git_config.config +GIT_CONFIG_GLOBAL=$(pwd)/blanck_git_config.config +export GIT_CONFIG_GLOBAL if git config --list --show-origin | grep 'file:'"$GIT_CONFIG_GLOBAL" > /dev/null; then echo "Error: Global config is not empty" exit 1 From 379cbd0d89b47972e36554914ebb5485bcc70157 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 18:33:16 -0700 Subject: [PATCH 30/46] Test --- .github/workflows/check-reproducibility.yml | 1 + run.sh | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index 222ff81ada..e9d37c5597 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -44,6 +44,7 @@ jobs: run: | git config --global user.email "example@example.com" git config --global user.name "Example Example" + python3 src/python/replay_merge.py --idx 522-15 -delete_workdir head -n 151 results/combined/result.csv > temp.csv && mv temp.csv results/combined/result.csv make NUM_PROCESSES=1 check-merges-reproducibility env: diff --git a/run.sh b/run.sh index df3dc94cef..d216181bef 100755 --- a/run.sh +++ b/run.sh @@ -55,6 +55,10 @@ done PATH=$(pwd)/src/scripts/merge_tools/:$PATH export PATH +# Print current git global config +echo "Current global git config:" +git config --list --show-origin + GIT_CONFIG_GLOBAL=$(pwd)/blanck_git_config.config export GIT_CONFIG_GLOBAL if git config --list --show-origin | grep 'file:'"$GIT_CONFIG_GLOBAL" > /dev/null; then From 1469c87577db7185334765517eed97ae01ca1f71 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 18:43:45 -0700 Subject: [PATCH 31/46] Spork update --- src/scripts/merge_tools/spork.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/scripts/merge_tools/spork.sh b/src/scripts/merge_tools/spork.sh index 2753140916..e526003494 100755 --- a/src/scripts/merge_tools/spork.sh +++ b/src/scripts/merge_tools/spork.sh @@ -27,9 +27,9 @@ branch1=$2 branch2=$3 # set up spork driver -(echo "[merge \"spork\"]"; - echo " name = spork"; - echo " driver = java -jar $spork_absolutepath --git-mode %A %O %B -o %A") >> "$clone_dir/.git/config" +git config --local merge.spork.name "spork" +git config --local merge.spork.driver "java -jar $spork_absolutepath --git-mode %A %O %B -o %A" + echo "*.java merge=spork" >> "$clone_dir/.gitattributes" # perform merge From a3de05838747ba37e25b884f33d779da80d6f402 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 18:57:36 -0700 Subject: [PATCH 32/46] better gitconfig --- .gitconfig | 3 +++ blanck_git_config.config | 0 run.sh | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 .gitconfig delete mode 100644 blanck_git_config.config diff --git a/.gitconfig b/.gitconfig new file mode 100644 index 0000000000..eb2a801689 --- /dev/null +++ b/.gitconfig @@ -0,0 +1,3 @@ +[user] + email = example@example.come + name = Example Example diff --git a/blanck_git_config.config b/blanck_git_config.config deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/run.sh b/run.sh index df3dc94cef..fd1cd1eea5 100755 --- a/run.sh +++ b/run.sh @@ -55,7 +55,7 @@ done PATH=$(pwd)/src/scripts/merge_tools/:$PATH export PATH -GIT_CONFIG_GLOBAL=$(pwd)/blanck_git_config.config +GIT_CONFIG_GLOBAL=$(pwd)/.gitconfig export GIT_CONFIG_GLOBAL if git config --list --show-origin | grep 'file:'"$GIT_CONFIG_GLOBAL" > /dev/null; then echo "Error: Global config is not empty" From e90c33d7c8dadd553f973516cd185124b8cc659c Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 19:20:13 -0700 Subject: [PATCH 33/46] Make sure killall is installed --- .github/workflows/small-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/small-test.yml b/.github/workflows/small-test.yml index 391a14ed2f..b907bc48a5 100644 --- a/.github/workflows/small-test.yml +++ b/.github/workflows/small-test.yml @@ -53,6 +53,8 @@ jobs: maven-version: ${{ matrix.maven }} - name: Clean caches & workspace run: make clean + - name: Install killall + run: sudo apt update && sudo apt install psmisc -y - run: echo "LOGURU_COLORIZE=NO" >> $GITHUB_ENV - name: Run small test run: | From 2eb54c350a8cb32fa7b4fc62b203048cc2d87be6 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 19:38:35 -0700 Subject: [PATCH 34/46] Fixed spork driver --- src/scripts/merge_tools/spork.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/scripts/merge_tools/spork.sh b/src/scripts/merge_tools/spork.sh index e526003494..9124fb89e3 100755 --- a/src/scripts/merge_tools/spork.sh +++ b/src/scripts/merge_tools/spork.sh @@ -26,14 +26,19 @@ clone_dir=$1 branch1=$2 branch2=$3 +cd "$clone_dir" || exit + # set up spork driver git config --local merge.spork.name "spork" git config --local merge.spork.driver "java -jar $spork_absolutepath --git-mode %A %O %B -o %A" -echo "*.java merge=spork" >> "$clone_dir/.gitattributes" +# print git config +echo "*.java merge=spork" >> .gitattributes # perform merge -cd "$clone_dir" || exit +echo "Current git config:" +git config --list + git checkout "$branch1" --force git merge --no-edit "$branch2" retVal=$? From 194432852a6de5862790fe4b67b3a2bdbd135c85 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 21:25:16 -0700 Subject: [PATCH 35/46] Create artifacts --- .github/workflows/check-reproducibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index e9d37c5597..cf4077e165 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -44,7 +44,7 @@ jobs: run: | git config --global user.email "example@example.com" git config --global user.name "Example Example" - python3 src/python/replay_merge.py --idx 522-15 -delete_workdir + python3 src/python/replay_merge.py --idx 522-15 -delete_workdir -create_artifacts head -n 151 results/combined/result.csv > temp.csv && mv temp.csv results/combined/result.csv make NUM_PROCESSES=1 check-merges-reproducibility env: From 52778e78cb6c997adb91848d44923a99a1d11eb3 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 22:09:45 -0700 Subject: [PATCH 36/46] Check permissions --- src/python/repo.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/python/repo.py b/src/python/repo.py index 176e4b67a4..9dc6da917c 100755 --- a/src/python/repo.py +++ b/src/python/repo.py @@ -242,6 +242,20 @@ def copy_repo(self) -> None: ignore_dangling_symlinks=True, ) os.system("chmod -R 777 " + str(self.local_repo_path)) + # Check if chmod worked + # Retrieve the mode (permissions) of the file/directory + mode = os.stat(str(self.local_repo_path)).st_mode + + # Mask out the permission bits with 0o777 (octal for 777) + if mode & 0o777 == 0o777: + logger.error( + f"The permissions for {str(self.local_repo_path)} are set to 777." + ) + else: + logger.error( + f"The permissions for {str(self.local_repo_path)} are not set to 777." + + f" Current permissions: {oct(mode & 0o777)}" + ) self.repo = Repo(self.local_repo_path) def checkout(self, commit: str, use_cache: bool = True) -> Tuple[bool, str]: From d0a3696bab6ebefd27b658c7a62887c9a40fb7a9 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 22:23:30 -0700 Subject: [PATCH 37/46] Preserve permissions --- src/python/replay_merge.py | 53 +++++++++----------------------------- 1 file changed, 12 insertions(+), 41 deletions(-) diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 88b59c2a1a..3d8af10003 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -4,6 +4,7 @@ import argparse import os import sys +import tarfile from pathlib import Path import shutil import pandas as pd @@ -24,50 +25,20 @@ def store_artifacts(result_df: pd.DataFrame) -> None: - """Store artifacts in a tarball with specific directory.""" - # Create temporary directories for the structured archive - base_dir = "archive" - if not os.path.exists(os.path.join(base_dir, "merge_replays")): - os.makedirs(os.path.join(base_dir, "merge_replays")) - if not os.path.exists(os.path.join(base_dir, "logs")): - os.makedirs(os.path.join(base_dir, "logs")) + """Store artifacts in a tarball directly fro.""" + tarball_path = "replay_merge_artifacts.tar.gz" - # Copy files to the new directory structure - for idx in result_df.index: - repo_path = result_df.loc[idx, "repo path"] - log_path = result_df.loc[idx, "merge log path"] + # Create the tarball and add files, ensuring no path modification + with tarfile.open(tarball_path, "w:gz") as tar: + for idx in result_df.index: + repo_path = result_df.loc[idx, "repo path"] + log_path = result_df.loc[idx, "merge log path"] - # Extract one level higher than the basename - repo_subdir = os.path.join( - *str(repo_path).split(os.sep)[-2:] - ) # Last two components of the path - log_subdir = os.path.basename(log_path) # Just the file name + # Add repository directories or files to the tarball with absolute paths + tar.add(repo_path, arcname=repo_path) - # Full new path creation - new_repo_path = os.path.join(base_dir, "merge_replays", repo_subdir) - new_log_path = os.path.join(base_dir, "logs", log_subdir) - - # Ensure directories exist - os.makedirs(os.path.dirname(new_repo_path), exist_ok=True) - - # Copy repository directories or files - if os.path.isdir(repo_path): - shutil.copytree(repo_path, new_repo_path) - else: - shutil.copy(repo_path, new_repo_path) - - # Copy log files - shutil.copy(log_path, new_log_path) - - # Create the tarball from the new directory structure - os.chdir( - base_dir - ) # Change directory to avoid including the 'archive/' prefix in the tarball - os.system("tar -czf ../replay_merge_artifacts.tar.gz merge_replays logs") - os.chdir("..") # Change back to the original directory - - # Clean up the temporary directory - shutil.rmtree(base_dir) + # Add log files to the tarball with absolute paths + tar.add(log_path, arcname=log_path) logger.info("Artifacts created") From 15ff2abf0ac1f356e870ee0ab9cd00c19598cae6 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 22:50:00 -0700 Subject: [PATCH 38/46] Adde res --- .github/workflows/check-reproducibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index cf4077e165..28c375937e 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -44,7 +44,7 @@ jobs: run: | git config --global user.email "example@example.com" git config --global user.name "Example Example" - python3 src/python/replay_merge.py --idx 522-15 -delete_workdir -create_artifacts + python3 src/python/replay_merge.py --idx 522-15 -create_artifacts head -n 151 results/combined/result.csv > temp.csv && mv temp.csv results/combined/result.csv make NUM_PROCESSES=1 check-merges-reproducibility env: From b33ceddd1c5e48a53b3a0a1c80e4044070a9ee38 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Mon, 6 May 2024 23:18:56 -0700 Subject: [PATCH 39/46] More finetuned java version --- .github/workflows/check-reproducibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index 28c375937e..beda1d6489 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -13,7 +13,7 @@ jobs: - uses: actions/setup-java@v4 with: distribution: 'zulu' - java-version: 17 + java-version: 17.0.7 - run: echo "JAVA17_HOME=$JAVA_HOME" >> $GITHUB_ENV - run: java -version - uses: actions/checkout@v4 From dd2f0fb1213b42b44a701f89a9e8472474c69d56 Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Tue, 7 May 2024 11:52:36 -0700 Subject: [PATCH 40/46] Set global git config when replaying merge --- src/python/replay_merge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 3d8af10003..453fb642b3 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -277,6 +277,7 @@ def merge_replay( os.environ["PATH"] += os.pathsep + os.path.join( os.getcwd(), "src/scripts/merge_tools" ) + os.environ["GIT_CONFIG_GLOBAL"] = os.getcwd() + "/.gitconfig" logger.info(f"Replaying merge with index {args.idx}") if args.delete_workdir: From 05d9f859d9f573224eebf14c979f0df6530e29ae Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Wed, 8 May 2024 09:17:16 -0700 Subject: [PATCH 41/46] Check results --- src/python/replay_merge.py | 24 +++++++++++++++++------- src/python/repo.py | 13 ------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 453fb642b3..96357f0158 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -162,13 +162,23 @@ def merge_replay( merge_fingerprint, ] assert repo.local_repo_path.exists() - if merge_result not in ( - MERGE_STATE.Merge_failed, - MERGE_STATE.Git_checkout_failed, - TEST_STATE.Git_checkout_failed, - ) and ( - merge_data[f"{merge_tool.name}_merge_fingerprint"] != merge_fingerprint - and not dont_check_fingerprints + + if ( # pylint: disable=too-many-boolean-expressions + merge_result + not in ( + MERGE_STATE.Git_checkout_failed, + TEST_STATE.Git_checkout_failed, + ) + and ( + merge_data[f"{merge_tool.name}_merge_fingerprint"] + != merge_fingerprint + and not dont_check_fingerprints + ) + and (merge_tool != MERGE_TOOL.spork) + and ( + merge_tool != MERGE_TOOL.gitmerge_resolve + or merge_result != MERGE_STATE.Merge_failed + ) ): assert repo.local_repo_path.exists() if create_artifacts: diff --git a/src/python/repo.py b/src/python/repo.py index 9dc6da917c..3424647f23 100755 --- a/src/python/repo.py +++ b/src/python/repo.py @@ -242,20 +242,7 @@ def copy_repo(self) -> None: ignore_dangling_symlinks=True, ) os.system("chmod -R 777 " + str(self.local_repo_path)) - # Check if chmod worked - # Retrieve the mode (permissions) of the file/directory - mode = os.stat(str(self.local_repo_path)).st_mode - # Mask out the permission bits with 0o777 (octal for 777) - if mode & 0o777 == 0o777: - logger.error( - f"The permissions for {str(self.local_repo_path)} are set to 777." - ) - else: - logger.error( - f"The permissions for {str(self.local_repo_path)} are not set to 777." - + f" Current permissions: {oct(mode & 0o777)}" - ) self.repo = Repo(self.local_repo_path) def checkout(self, commit: str, use_cache: bool = True) -> Tuple[bool, str]: From 5b8e38511770fc876da5f3dcd465664154915b5d Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Thu, 9 May 2024 09:48:13 -0700 Subject: [PATCH 42/46] Parallel merge reproducibility tests --- Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index afa72fe7d2..0856e1b52f 100644 --- a/Makefile +++ b/Makefile @@ -132,11 +132,9 @@ clean-local: rm -rf repos check-merges-reproducibility: - @echo "Running replay_merge for each idx in parallel..." + @echo "Running replay_merge for each idx in parallel using GNU Parallel..." @set -e; \ - tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | while read idx; do \ - python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx $$idx; \ - done + tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel -j 100% --bar python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) --delete_workdir --idx {} protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + From 1b0335318b2ce888d14387406cd2febc9595a76b Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Thu, 9 May 2024 09:52:30 -0700 Subject: [PATCH 43/46] Bug fix --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0856e1b52f..22beedb66a 100644 --- a/Makefile +++ b/Makefile @@ -134,7 +134,7 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel using GNU Parallel..." @set -e; \ - tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel -j 100% --bar python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) --delete_workdir --idx {} + tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel -j 100% --bar python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {} protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + From b34f003bcdbbc0ce9623419141473e24fd5baeee Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Thu, 9 May 2024 11:05:45 -0700 Subject: [PATCH 44/46] Limit parallelism --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 22beedb66a..df28db1073 100644 --- a/Makefile +++ b/Makefile @@ -134,7 +134,7 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel using GNU Parallel..." @set -e; \ - tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel -j 100% --bar python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {} + tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel -j 75% python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {} protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + From 7a480e1cb25152781651d7374d31035d84cc927e Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Thu, 9 May 2024 12:18:28 -0700 Subject: [PATCH 45/46] Reduce number cpus --- Makefile | 2 +- src/python/replay_merge.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index df28db1073..4ee0c98848 100644 --- a/Makefile +++ b/Makefile @@ -134,7 +134,7 @@ clean-local: check-merges-reproducibility: @echo "Running replay_merge for each idx in parallel using GNU Parallel..." @set -e; \ - tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel -j 75% python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {} + tail -n +2 $(CSV_RESULTS) | awk -F, '{print $$1}' | parallel -j 50% python3 src/python/replay_merge.py --merges_csv $(CSV_RESULTS) -delete_workdir --idx {} protect-repos: find repos -mindepth 1 -type d -exec chmod a-w {} + diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 96357f0158..e55f7484d4 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -21,7 +21,6 @@ from loguru import logger logger.add("replay_merge.log", mode="a") -logger.add(sys.stdout, colorize=True) def store_artifacts(result_df: pd.DataFrame) -> None: From 01c522556bfb415a92e9ba1238c26da853c1d12f Mon Sep 17 00:00:00 2001 From: Benedikt Schesch Date: Thu, 9 May 2024 12:28:09 -0700 Subject: [PATCH 46/46] Fix --- .github/workflows/check-reproducibility.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index beda1d6489..36f7780179 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -44,8 +44,7 @@ jobs: run: | git config --global user.email "example@example.com" git config --global user.name "Example Example" - python3 src/python/replay_merge.py --idx 522-15 -create_artifacts head -n 151 results/combined/result.csv > temp.csv && mv temp.csv results/combined/result.csv - make NUM_PROCESSES=1 check-merges-reproducibility + make check-merges-reproducibility env: GITHUB_TOKEN: ${{ secrets.TOKEN_GITHUB }}