From 4c7426d16b74249b5821e4c39b93b25c6139fda0 Mon Sep 17 00:00:00 2001 From: Michael Ernst Date: Thu, 16 May 2024 11:38:42 -0700 Subject: [PATCH] Improvements to `replay_merge.py` and `select_from_results.py` (#293) --- src/python/replay_merge.py | 58 +++++++++++++++++++++++++++++-- src/python/repo.py | 1 - src/python/select_from_results.py | 32 +++++++++++++---- 3 files changed, 80 insertions(+), 11 deletions(-) diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index 4fa2375fcc..adb6d24ebd 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -12,6 +12,7 @@ import tarfile from pathlib import Path import shutil +import subprocess import pandas as pd from repo import Repository, MERGE_TOOL, TEST_STATE, MERGE_STATE from variables import TIMEOUT_TESTING_MERGE, N_TESTS, WORKDIR_DIRECTORY @@ -90,6 +91,57 @@ def merge_replay( f"Replaying {repo_slug} {merge_data['left']} {merge_data['right']}", total=len(MERGE_TOOL), ) + + # Get base, left, right, and programmer merge. + + workdir = Path(f"{repo_slug}-merge-input-left") + if not (WORKDIR_DIRECTORY / workdir).exists(): + repo = Repository( + repo_slug, + cache_directory=Path("no_cache/"), + workdir_id=workdir, + delete_workdir=False, + lazy_clone=False, + ) + repo.checkout(merge_data["left"]) + + workdir = Path(f"{repo_slug}-merge-input-right") + if not (WORKDIR_DIRECTORY / workdir).exists(): + repo = Repository( + repo_slug, + cache_directory=Path("no_cache/"), + workdir_id=workdir, + delete_workdir=False, + lazy_clone=False, + ) + repo.checkout(merge_data["right"]) + + workdir = Path(f"{repo_slug}-merge-input-base") + if not (WORKDIR_DIRECTORY / workdir).exists(): + repo = Repository( + repo_slug, + cache_directory=Path("no_cache/"), + workdir_id=workdir, + delete_workdir=False, + lazy_clone=False, + ) + base_commit = subprocess.run( + ["git", "merge-base", merge_data["left"], merge_data["right"]], + stdout=subprocess.PIPE, + ).stdout.decode("utf-8") + repo.checkout(base_commit) + + workdir = Path(f"{repo_slug}-merge-input-programmer") + if not (WORKDIR_DIRECTORY / workdir).exists(): + repo = Repository( + repo_slug, + cache_directory=Path("no_cache/"), + workdir_id=workdir, + delete_workdir=False, + lazy_clone=False, + ) + repo.checkout(merge_data["merge"]) + for merge_tool in MERGE_TOOL: progress.update(task, advance=1) workdir = Path( @@ -104,19 +156,19 @@ def merge_replay( if (WORKDIR_DIRECTORY / workdir).exists(): # Ask the user if they want to delete the workdir logger.info( - f"workdir {WORKDIR_DIRECTORY / workdir} already exists for idx: {merge_idx}" + f"Workdir {WORKDIR_DIRECTORY / workdir} already exists for idx: {merge_idx}" ) if delete_workdir: answer = "y" else: answer = input( - f"workdir {workdir} exists for idx: {merge_idx}. Delete it? (y/n)" + f"Workdir {workdir} exists for idx: {merge_idx}. Delete it? (y/n)" ) if answer == "y": shutil.rmtree(WORKDIR_DIRECTORY / workdir) else: logger.info( - f"workdir {WORKDIR_DIRECTORY/workdir} already exists. Skipping" + f"Workdir {WORKDIR_DIRECTORY/workdir} already exists. Skipping." ) continue try: diff --git a/src/python/repo.py b/src/python/repo.py index 3424647f23..176e4b67a4 100755 --- a/src/python/repo.py +++ b/src/python/repo.py @@ -242,7 +242,6 @@ def copy_repo(self) -> None: ignore_dangling_symlinks=True, ) os.system("chmod -R 777 " + str(self.local_repo_path)) - self.repo = Repo(self.local_repo_path) def checkout(self, commit: str, use_cache: bool = True) -> Tuple[bool, str]: diff --git a/src/python/select_from_results.py b/src/python/select_from_results.py index fe7ffbf60d..6c1f3b8733 100755 --- a/src/python/select_from_results.py +++ b/src/python/select_from_results.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """Output a subset of the results, to standard out. -The arguments are a query and a list of columns. +The arguments are a query and an optional list of columns. The query is executed (to select rows), then columns are output that include: * idx * all the columns that appear in the query @@ -13,9 +13,14 @@ Here are example invocations: select_from_results.py '(gitmerge_ort == "Merge_failed") and (spork != "Merge_failed")' select_from_results.py '(gitmerge_ort == "Merge_failed") != (spork == "Merge_failed")' + +The resulting .csv is useful for manual examination but cannot be passed to +`replay_merge.py` because that requires a .csv file with all tools and all +fingerprints. """ import argparse +import os from os import system import re import tempfile @@ -25,9 +30,9 @@ def columns_in_query(query): """Returns all the identifiers used in the query.""" result = re.findall(r"""(?