Skip to content

Commit

Permalink
Improvements to replay_merge.py and select_from_results.py (#293)
Browse files Browse the repository at this point in the history
  • Loading branch information
mernst authored May 16, 2024
1 parent d77ff05 commit 4c7426d
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 11 deletions.
58 changes: 55 additions & 3 deletions src/python/replay_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import tarfile
from pathlib import Path
import shutil
import subprocess
import pandas as pd
from repo import Repository, MERGE_TOOL, TEST_STATE, MERGE_STATE
from variables import TIMEOUT_TESTING_MERGE, N_TESTS, WORKDIR_DIRECTORY
Expand Down Expand Up @@ -90,6 +91,57 @@ def merge_replay(
f"Replaying {repo_slug} {merge_data['left']} {merge_data['right']}",
total=len(MERGE_TOOL),
)

# Get base, left, right, and programmer merge.

workdir = Path(f"{repo_slug}-merge-input-left")
if not (WORKDIR_DIRECTORY / workdir).exists():
repo = Repository(
repo_slug,
cache_directory=Path("no_cache/"),
workdir_id=workdir,
delete_workdir=False,
lazy_clone=False,
)
repo.checkout(merge_data["left"])

workdir = Path(f"{repo_slug}-merge-input-right")
if not (WORKDIR_DIRECTORY / workdir).exists():
repo = Repository(
repo_slug,
cache_directory=Path("no_cache/"),
workdir_id=workdir,
delete_workdir=False,
lazy_clone=False,
)
repo.checkout(merge_data["right"])

workdir = Path(f"{repo_slug}-merge-input-base")
if not (WORKDIR_DIRECTORY / workdir).exists():
repo = Repository(
repo_slug,
cache_directory=Path("no_cache/"),
workdir_id=workdir,
delete_workdir=False,
lazy_clone=False,
)
base_commit = subprocess.run(
["git", "merge-base", merge_data["left"], merge_data["right"]],
stdout=subprocess.PIPE,
).stdout.decode("utf-8")
repo.checkout(base_commit)

workdir = Path(f"{repo_slug}-merge-input-programmer")
if not (WORKDIR_DIRECTORY / workdir).exists():
repo = Repository(
repo_slug,
cache_directory=Path("no_cache/"),
workdir_id=workdir,
delete_workdir=False,
lazy_clone=False,
)
repo.checkout(merge_data["merge"])

for merge_tool in MERGE_TOOL:
progress.update(task, advance=1)
workdir = Path(
Expand All @@ -104,19 +156,19 @@ def merge_replay(
if (WORKDIR_DIRECTORY / workdir).exists():
# Ask the user if they want to delete the workdir
logger.info(
f"workdir {WORKDIR_DIRECTORY / workdir} already exists for idx: {merge_idx}"
f"Workdir {WORKDIR_DIRECTORY / workdir} already exists for idx: {merge_idx}"
)
if delete_workdir:
answer = "y"
else:
answer = input(
f"workdir {workdir} exists for idx: {merge_idx}. Delete it? (y/n)"
f"Workdir {workdir} exists for idx: {merge_idx}. Delete it? (y/n)"
)
if answer == "y":
shutil.rmtree(WORKDIR_DIRECTORY / workdir)
else:
logger.info(
f"workdir {WORKDIR_DIRECTORY/workdir} already exists. Skipping"
f"Workdir {WORKDIR_DIRECTORY/workdir} already exists. Skipping."
)
continue
try:
Expand Down
1 change: 0 additions & 1 deletion src/python/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ def copy_repo(self) -> None:
ignore_dangling_symlinks=True,
)
os.system("chmod -R 777 " + str(self.local_repo_path))

self.repo = Repo(self.local_repo_path)

def checkout(self, commit: str, use_cache: bool = True) -> Tuple[bool, str]:
Expand Down
32 changes: 25 additions & 7 deletions src/python/select_from_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-

"""Output a subset of the results, to standard out.
The arguments are a query and a list of columns.
The arguments are a query and an optional list of columns.
The query is executed (to select rows), then columns are output that include:
* idx
* all the columns that appear in the query
Expand All @@ -13,9 +13,14 @@
Here are example invocations:
select_from_results.py '(gitmerge_ort == "Merge_failed") and (spork != "Merge_failed")'
select_from_results.py '(gitmerge_ort == "Merge_failed") != (spork == "Merge_failed")'
The resulting .csv is useful for manual examination but cannot be passed to
`replay_merge.py` because that requires a .csv file with all tools and all
fingerprints.
"""

import argparse
import os
from os import system
import re
import tempfile
Expand All @@ -25,9 +30,9 @@
def columns_in_query(query):
"""Returns all the identifiers used in the query."""
result = re.findall(r"""(?<!['"])\b[A-Za-z][A-Za-z_]*\b(?!['"])""", query)
if "and" in result:
while "and" in result:
result.remove("and")
if "or" in result:
while "or" in result:
result.remove("or")
return result

Expand All @@ -44,8 +49,11 @@ def main():
description="Outputs a subset of the results, to standard out",
)
parser.add_argument("query")
scriptdir = os.path.dirname(os.path.realpath(__file__))
parser.add_argument(
"--input", action="store", default="../../results/combined/result.csv"
"--input",
action="store",
default=scriptdir + "/" + "../../results/combined/result.csv",
)
parser.add_argument("columns", nargs=argparse.REMAINDER)
args = parser.parse_args()
Expand All @@ -57,16 +65,26 @@ def main():

# Select some columns
columns_to_select = (
["idx", "repo-idx", "merge-idx", "branch_name", "merge", "left", "right"]
[
"idx",
"repo-idx",
"merge-idx",
"branch_name",
"merge",
"left",
"left_tree_fingerprint",
"right",
"right_tree_fingerprint",
]
+ columns_in_query(args.query)
+ args.columns
+ ["repository"]
)
df = df[columns_to_select]

# Gross way to produce output to standard out
with tempfile.TemporaryFile() as tmpfile:
with tempfile.NamedTemporaryFile() as tmpfile:
df.to_csv(tmpfile)
print(tmpfile.name)
system("cat " + tmpfile.name)


Expand Down

0 comments on commit 4c7426d

Please sign in to comment.