Skip to content

Commit

Permalink
diff3 changes on new dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
cactusbranch01 committed Dec 14, 2023
1 parent bf922fe commit 9809519
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 33 deletions.
29 changes: 20 additions & 9 deletions src/python/diff3_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,41 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dropwizard/metrics : Cloning repo\n",
"dropwizard/metrics : Finished cloning\n",
"dropwizard/metrics : Finished cloning\n",
"229c8236f0764ab62887afccd4dcb4928ed6de5c\n",
"\n",
"dropwizard/metrics : Cloning repo\n",
"dropwizard/metrics : Finished cloning\n",
"dropwizard/metrics : Finished cloning\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Switched to branch 'TEMP_LEFT_BRANCH'\n",
"diff: ./repos/merge_attempt/dropwizard/metrics/pom.xml: No such file or directory\n",
"diff: ./repos/programmer_merge/dropwizard/metrics/pom.xml: No such file or directory\n"
"ename": "GitCommandError",
"evalue": "Cmd('git') failed due to: exit code(1)\n cmdline: git checkout --force 229c8236f0764ab62887afccd4dcb4928ed6de5c\n\n stderr: 'error: pathspec '229c8236f0764ab62887afccd4dcb4928ed6de5c\n' did not match any file(s) known to git'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mGitCommandError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 18\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# merge_tool = \"gitmerge_ort_adjacent\"\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# merge_tool = \"gitmerge_ort_ignorespace\"\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# merge_tool = \"gitmerge_ort_imports\"\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# merge_tool = \"spork\"\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# merge_tool = \"intellimerge\"\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdiff3_analysis\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m diff3_analysis\n\u001b[0;32m---> 18\u001b[0m \u001b[43mdiff3_analysis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmerge_tool\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrow_num\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Documents/GitHub/AST-Merging-Ben-Analysis/src/python/diff3_analysis.py:60\u001b[0m, in \u001b[0;36mdiff3_analysis\u001b[0;34m(merge_tool, repo_num)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28mprint\u001b[39m(base_sha\u001b[38;5;241m.\u001b[39mstdout)\n\u001b[1;32m 57\u001b[0m repo \u001b[38;5;241m=\u001b[39m clone_repo_to_path(\n\u001b[1;32m 58\u001b[0m repo_name, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./repos/base\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 59\u001b[0m ) \u001b[38;5;66;03m# Return a Git-Python repo object\u001b[39;00m\n\u001b[0;32m---> 60\u001b[0m \u001b[43mrepo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgit\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheckout\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_sha\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstdout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 61\u001b[0m repo\u001b[38;5;241m.\u001b[39msubmodule_update()\n\u001b[1;32m 63\u001b[0m result \u001b[38;5;241m=\u001b[39m subprocess\u001b[38;5;241m.\u001b[39mrun(\n\u001b[1;32m 64\u001b[0m [\n\u001b[1;32m 65\u001b[0m script,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 71\u001b[0m text\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 72\u001b[0m )\n",
"File \u001b[0;32m~/miniconda/envs/research/lib/python3.8/site-packages/git/cmd.py:736\u001b[0m, in \u001b[0;36mGit.__getattr__.<locals>.<lambda>\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 734\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 735\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m LazyMixin\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__getattr__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name)\n\u001b[0;32m--> 736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mlambda\u001b[39;00m \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_process\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda/envs/research/lib/python3.8/site-packages/git/cmd.py:1316\u001b[0m, in \u001b[0;36mGit._call_process\u001b[0;34m(self, method, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1313\u001b[0m call\u001b[38;5;241m.\u001b[39mappend(dashify(method))\n\u001b[1;32m 1314\u001b[0m call\u001b[38;5;241m.\u001b[39mextend(args_list)\n\u001b[0;32m-> 1316\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcall\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mexec_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda/envs/research/lib/python3.8/site-packages/git/cmd.py:1111\u001b[0m, in \u001b[0;36mGit.execute\u001b[0;34m(self, command, istream, with_extended_output, with_exceptions, as_process, output_stream, stdout_as_string, kill_after_timeout, with_stdout, universal_newlines, shell, env, max_chunk_size, strip_newline_in_stdout, **subprocess_kwargs)\u001b[0m\n\u001b[1;32m 1108\u001b[0m \u001b[38;5;66;03m# END handle debug printing\u001b[39;00m\n\u001b[1;32m 1110\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m with_exceptions \u001b[38;5;129;01mand\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1111\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m GitCommandError(redacted_command, status, stderr_value, stdout_value)\n\u001b[1;32m 1113\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(stdout_value, \u001b[38;5;28mbytes\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m stdout_as_string: \u001b[38;5;66;03m# could also be output_stream\u001b[39;00m\n\u001b[1;32m 1114\u001b[0m stdout_value \u001b[38;5;241m=\u001b[39m safe_decode(stdout_value)\n",
"\u001b[0;31mGitCommandError\u001b[0m: Cmd('git') failed due to: exit code(1)\n cmdline: git checkout --force 229c8236f0764ab62887afccd4dcb4928ed6de5c\n\n stderr: 'error: pathspec '229c8236f0764ab62887afccd4dcb4928ed6de5c\n' did not match any file(s) known to git'"
]
}
],
"source": [
"repo_num = 548\n",
"row_num = 548\n",
"merge_tool = \"gitmerge_ort\"\n",
"# merge_tool = \"gitmerge_ort_adjacent\"\n",
"# merge_tool = \"gitmerge_ort_ignorespace\"\n",
Expand All @@ -42,7 +53,7 @@
"# merge_tool = \"intellimerge\"\n",
"\n",
"from diff3_analysis import diff3_analysis\n",
"diff3_analysis(merge_tool, repo_num)"
"diff3_analysis(merge_tool, row_num)"
]
}
],
Expand Down
58 changes: 36 additions & 22 deletions src/python/diff3_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import subprocess
import re
import os
import shutil
import tempfile
import pandas as pd
from repo import clone_repo_to_path
Expand All @@ -22,6 +23,8 @@ def diff3_analysis(merge_tool: str, repo_num: int):
Returns:
None
"""
shutil.rmtree("./repos", ignore_errors=True)

df = pd.read_csv("../../results_greatest_hits/result.csv")
repo_name = df.iloc[repo_num]["repository"]

Expand All @@ -38,6 +41,25 @@ def diff3_analysis(merge_tool: str, repo_num: int):
repo.submodule_update()
repo.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True)


base_sha = subprocess.run(
[
"git",
"merge-base",
"TEMP_LEFT_BRANCH",
"TEMP_RIGHT_BRANCH",
],
cwd="./repos/merge_attempt/" + repo_name,
stdout=subprocess.PIPE,
text=True,
)
print(base_sha.stdout)
repo = clone_repo_to_path(
repo_name, "./repos/base"
) # Return a Git-Python repo object
repo.git.checkout(base_sha.stdout, force=True)
repo.submodule_update()

result = subprocess.run(
[
script,
Expand All @@ -53,19 +75,14 @@ def diff3_analysis(merge_tool: str, repo_num: int):
r"CONFLICT \(.+\): Merge conflict in (.+)", result.stdout
)

print(result.stdout)

repo = clone_repo_to_path(
repo_name, "./repos/programmer_merge"
) # Return a Git-Python repo object
repo.git.checkout(df.iloc[repo_num]["merge"], force=True)
repo.submodule_update()

'''
repo = clone_repo_to_path(
repo_name, "./repos/base"
) # Return a Git-Python repo object
repo.git.checkout(df.iloc[repo_num]["base"], force=True)
repo.submodule_update()
'''

for conflict_file_match in conflict_file_matches:
conflicting_file = str(conflict_file_match)
Expand All @@ -74,13 +91,11 @@ def diff3_analysis(merge_tool: str, repo_num: int):
"./repos/merge_attempt", conflict_path
)

'''
conflict_path_base = os.path.join("./repos/base", conflict_path)
'''
conflict_path_programmer_merge = os.path.join(
"./repos/programmer_merge", conflict_path
)
'''

diff_results = subprocess.run(
[
"diff3",
Expand All @@ -96,17 +111,16 @@ def diff3_analysis(merge_tool: str, repo_num: int):
# Check that diff3 didn't run into missing files in the base
error_message = "No such file or directory"
if error_message in diff_results.stderr:
'''
# Since the conflict file was added in both parents we can't diff the base.
diff_results = subprocess.run(
[
"diff",
conflict_path_merge_attempt,
conflict_path_programmer_merge,
],
stdout=subprocess.PIPE,
text=True,
)
# Since the conflict file was added in both parents we can't diff the base.
diff_results = subprocess.run(
[
"diff",
conflict_path_merge_attempt,
conflict_path_programmer_merge,
],
stdout=subprocess.PIPE,
text=True,
)

# Use a temporary file to store the diff results
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file:
Expand All @@ -126,7 +140,6 @@ def diff3_analysis(merge_tool: str, repo_num: int):
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
)
'''
subprocess.run(
["rm", "-rf", "./repos/merge_attempt"],
stderr=subprocess.PIPE,
Expand All @@ -137,3 +150,4 @@ def diff3_analysis(merge_tool: str, repo_num: int):
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
)
'''
2 changes: 1 addition & 1 deletion src/python/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def clone_repo_to_path(repo_slug: str, path: str) -> git.repo.Repo:
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
"""
repo_dir = REPOS_PATH / Path(repo_slug)
repo_dir = Path(path) / Path(repo_slug)
if repo_dir.exists():
repo = git.repo.Repo(repo_dir)
else:
Expand Down
1 change: 0 additions & 1 deletion src/scripts/merge_tools/gitmerge.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ retVal=$?
# report conflicts
if [ $retVal -ne 0 ]; then
echo "Conflict"
git merge --abort
fi

exit $retVal

0 comments on commit 9809519

Please sign in to comment.