Skip to content

Commit

Permalink
extend-ben-dataset branch changes
Browse files Browse the repository at this point in the history
  • Loading branch information
cactusbranch01 committed Dec 11, 2023
1 parent 48642bb commit bf922fe
Show file tree
Hide file tree
Showing 3 changed files with 237 additions and 0 deletions.
70 changes: 70 additions & 0 deletions src/python/diff3_analysis.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dropwizard/metrics : Cloning repo\n",
"dropwizard/metrics : Finished cloning\n",
"dropwizard/metrics : Finished cloning\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Switched to branch 'TEMP_LEFT_BRANCH'\n",
"diff: ./repos/merge_attempt/dropwizard/metrics/pom.xml: No such file or directory\n",
"diff: ./repos/programmer_merge/dropwizard/metrics/pom.xml: No such file or directory\n"
]
}
],
"source": [
"repo_num = 548\n",
"merge_tool = \"gitmerge_ort\"\n",
"# merge_tool = \"gitmerge_ort_adjacent\"\n",
"# merge_tool = \"gitmerge_ort_ignorespace\"\n",
"# merge_tool = \"gitmerge_ort_imports\"\n",
"# merge_tool = \"gitmerge_ort_imports_ignorespace\"\n",
"# merge_tool = \"gitmerge_resolve\"\n",
"# merge_tool = \"gitmerge_recursive_histogram\"\n",
"# merge_tool = \"gitmerge_recursive_ignorespace\"\n",
"# merge_tool = \"gitmerge_recursive_minimal\"\t\n",
"# merge_tool = \"gitmerge_recursive_myers\"\n",
"# merge_tool = \"gitmerge_recursive_patience\"\n",
"# merge_tool = \"git_hires_merge\"\n",
"# merge_tool = \"spork\"\n",
"# merge_tool = \"intellimerge\"\n",
"\n",
"from diff3_analysis import diff3_analysis\n",
"diff3_analysis(merge_tool, repo_num)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "research",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
139 changes: 139 additions & 0 deletions src/python/diff3_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""Runs a merge and uses diff3 to compare it to the base and final branch of a given repo.
"""
import subprocess
import re
import os
import tempfile
import pandas as pd
from repo import clone_repo_to_path
from merge_tester import MERGE_STATE

# pylint: disable-msg=too-many-locals


def diff3_analysis(merge_tool: str, repo_num: int):
"""
Analyzes merge conflicts using the diff3 tool and opens the results in the default text viewer.
Args:
merge_tool (str): The merge tool to be used.
repo_num (int): The index of the repository in the results DataFrame.
Returns:
None
"""
df = pd.read_csv("../../results_greatest_hits/result.csv")
repo_name = df.iloc[repo_num]["repository"]

script = "../scripts/merge_tools/" + merge_tool + ".sh"
repo = clone_repo_to_path(
repo_name, "./repos/merge_attempt"
) # Return a Git-Python repo object
repo.remote().fetch()
left_sha = df.iloc[repo_num]["left"]
repo.git.checkout(left_sha, force=True)
repo.submodule_update()
repo.git.checkout("-b", "TEMP_LEFT_BRANCH", force=True)
repo.git.checkout(df.iloc[repo_num]["right"], force=True)
repo.submodule_update()
repo.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True)

result = subprocess.run(
[
script,
repo.git.rev_parse("--show-toplevel"),
"TEMP_LEFT_BRANCH",
"TEMP_RIGHT_BRANCH",
],
stdout=subprocess.PIPE,
text=True,
)

conflict_file_matches = re.findall(
r"CONFLICT \(.+\): Merge conflict in (.+)", result.stdout
)

repo = clone_repo_to_path(
repo_name, "./repos/programmer_merge"
) # Return a Git-Python repo object
repo.git.checkout(df.iloc[repo_num]["merge"], force=True)
repo.submodule_update()

'''
repo = clone_repo_to_path(
repo_name, "./repos/base"
) # Return a Git-Python repo object
repo.git.checkout(df.iloc[repo_num]["base"], force=True)
repo.submodule_update()
'''

for conflict_file_match in conflict_file_matches:
conflicting_file = str(conflict_file_match)
conflict_path = os.path.join(repo_name, conflicting_file)
conflict_path_merge_attempt = os.path.join(
"./repos/merge_attempt", conflict_path
)

'''
conflict_path_base = os.path.join("./repos/base", conflict_path)
'''
conflict_path_programmer_merge = os.path.join(
"./repos/programmer_merge", conflict_path
)
'''
diff_results = subprocess.run(
[
"diff3",
conflict_path_base,
conflict_path_merge_attempt,
conflict_path_programmer_merge,
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
# Check that diff3 didn't run into missing files in the base
error_message = "No such file or directory"
if error_message in diff_results.stderr:
'''
# Since the conflict file was added in both parents we can't diff the base.
diff_results = subprocess.run(
[
"diff",
conflict_path_merge_attempt,
conflict_path_programmer_merge,
],
stdout=subprocess.PIPE,
text=True,
)

# Use a temporary file to store the diff results
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file:
temp_file.write(diff_results.stdout)

# Open the saved text file with the default application
subprocess.run(["xdg-open", temp_file.name], check=True)

# Delete the temporary file
os.remove(temp_file.name)

# Deletes base, programmer_merge, and merge_attempt folders in repos dir
# We do this to prevent errors if cloning the same repo into the folder twice
'''
subprocess.run(
["rm", "-rf", "./repos/base"],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
)
'''
subprocess.run(
["rm", "-rf", "./repos/merge_attempt"],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
)
subprocess.run(
["rm", "-rf", "./repos/programmer_merge"],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
)
28 changes: 28 additions & 0 deletions src/python/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,34 @@ def clone_repo(repo_slug: str) -> git.repo.Repo:
return repo


def clone_repo_to_path(repo_slug: str, path: str) -> git.repo.Repo:
"""Clones a repository, or runs `git fetch` if the repository is already cloned.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
"""
repo_dir = REPOS_PATH / Path(repo_slug)
if repo_dir.exists():
repo = git.repo.Repo(repo_dir)
else:
repo_dir.parent.mkdir(parents=True, exist_ok=True)
os.environ["GIT_TERMINAL_PROMPT"] = "0"
print(repo_slug, " : Cloning repo")
# ":@" in URL ensures that we are not prompted for login details
# for the repos that are now private.
github_url = "https://:@github.com/" + repo_slug + ".git"
print(repo_slug, " : Finished cloning")
try:
repo = git.repo.Repo.clone_from(github_url, repo_dir)
print(repo_slug, " : Finished cloning")
repo.remote().fetch()
repo.remote().fetch("refs/pull/*/head:refs/remotes/origin/pull/*")
repo.submodule_update()
except Exception as e:
print(repo_slug, "Exception during cloning:\n", e)
raise
return repo


TEST_STATE = Enum(
"TEST_STATE",
[
Expand Down

0 comments on commit bf922fe

Please sign in to comment.