diff --git a/src/python/diff3_analysis.ipynb b/src/python/diff3_analysis.ipynb index 36700392ce..b730faee9c 100644 --- a/src/python/diff3_analysis.ipynb +++ b/src/python/diff3_analysis.ipynb @@ -2,41 +2,47 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "dropwizard/metrics : Cloning repo\n", - "dropwizard/metrics : Finished cloning\n", - "dropwizard/metrics : Finished cloning\n", - "229c8236f0764ab62887afccd4dcb4928ed6de5c\n", - "\n", - "dropwizard/metrics : Cloning repo\n", - "dropwizard/metrics : Finished cloning\n", - "dropwizard/metrics : Finished cloning\n" + "apache/commons-collections : Cloning repo\n", + "apache/commons-collections : Finished cloning\n", + "apache/commons-collections : Finished cloning\n", + "Checking out left0b8a4c7b71b682ac8417822ac693cbc8b6c261b3\n", + "Checking out right2cbac58f7e3b51a4f2f3a6672bb4380e18469c50\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Switched to branch 'TEMP_LEFT_BRANCH'\n" ] }, { - "ename": "GitCommandError", - "evalue": "Cmd('git') failed due to: exit code(1)\n cmdline: git checkout --force 229c8236f0764ab62887afccd4dcb4928ed6de5c\n\n stderr: 'error: pathspec '229c8236f0764ab62887afccd4dcb4928ed6de5c\n' did not match any file(s) known to git'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mGitCommandError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 18\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# merge_tool = \"gitmerge_ort_adjacent\"\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# merge_tool = \"gitmerge_ort_ignorespace\"\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# merge_tool = \"gitmerge_ort_imports\"\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# merge_tool = \"spork\"\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# merge_tool = \"intellimerge\"\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdiff3_analysis\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m diff3_analysis\n\u001b[0;32m---> 18\u001b[0m \u001b[43mdiff3_analysis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmerge_tool\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrow_num\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/GitHub/AST-Merging-Ben-Analysis/src/python/diff3_analysis.py:60\u001b[0m, in \u001b[0;36mdiff3_analysis\u001b[0;34m(merge_tool, repo_num)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28mprint\u001b[39m(base_sha\u001b[38;5;241m.\u001b[39mstdout)\n\u001b[1;32m 57\u001b[0m repo \u001b[38;5;241m=\u001b[39m clone_repo_to_path(\n\u001b[1;32m 58\u001b[0m repo_name, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./repos/base\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 59\u001b[0m ) \u001b[38;5;66;03m# Return a Git-Python repo object\u001b[39;00m\n\u001b[0;32m---> 60\u001b[0m \u001b[43mrepo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgit\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheckout\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_sha\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstdout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 61\u001b[0m repo\u001b[38;5;241m.\u001b[39msubmodule_update()\n\u001b[1;32m 63\u001b[0m result \u001b[38;5;241m=\u001b[39m subprocess\u001b[38;5;241m.\u001b[39mrun(\n\u001b[1;32m 64\u001b[0m [\n\u001b[1;32m 65\u001b[0m script,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 71\u001b[0m text\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 72\u001b[0m )\n", - "File \u001b[0;32m~/miniconda/envs/research/lib/python3.8/site-packages/git/cmd.py:736\u001b[0m, in \u001b[0;36mGit.__getattr__..\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 734\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 735\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m LazyMixin\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__getattr__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name)\n\u001b[0;32m--> 736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mlambda\u001b[39;00m \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_process\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda/envs/research/lib/python3.8/site-packages/git/cmd.py:1316\u001b[0m, in \u001b[0;36mGit._call_process\u001b[0;34m(self, method, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1313\u001b[0m call\u001b[38;5;241m.\u001b[39mappend(dashify(method))\n\u001b[1;32m 1314\u001b[0m call\u001b[38;5;241m.\u001b[39mextend(args_list)\n\u001b[0;32m-> 1316\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcall\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mexec_kwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda/envs/research/lib/python3.8/site-packages/git/cmd.py:1111\u001b[0m, in \u001b[0;36mGit.execute\u001b[0;34m(self, command, istream, with_extended_output, with_exceptions, as_process, output_stream, stdout_as_string, kill_after_timeout, with_stdout, universal_newlines, shell, env, max_chunk_size, strip_newline_in_stdout, **subprocess_kwargs)\u001b[0m\n\u001b[1;32m 1108\u001b[0m \u001b[38;5;66;03m# END handle debug printing\u001b[39;00m\n\u001b[1;32m 1110\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m with_exceptions \u001b[38;5;129;01mand\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1111\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m GitCommandError(redacted_command, status, stderr_value, stdout_value)\n\u001b[1;32m 1113\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(stdout_value, \u001b[38;5;28mbytes\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m stdout_as_string: \u001b[38;5;66;03m# could also be output_stream\u001b[39;00m\n\u001b[1;32m 1114\u001b[0m stdout_value \u001b[38;5;241m=\u001b[39m safe_decode(stdout_value)\n", - "\u001b[0;31mGitCommandError\u001b[0m: Cmd('git') failed due to: exit code(1)\n cmdline: git checkout --force 229c8236f0764ab62887afccd4dcb4928ed6de5c\n\n stderr: 'error: pathspec '229c8236f0764ab62887afccd4dcb4928ed6de5c\n' did not match any file(s) known to git'" + "name": "stdout", + "output_type": "stream", + "text": [ + "Running: git merge --no-edit -s ort TEMP_RIGHT_BRANCH\n", + "Auto-merging src/main/java/org/apache/commons/collections4/CollectionUtils.java\n", + "CONFLICT (content): Merge conflict in src/main/java/org/apache/commons/collections4/CollectionUtils.java\n", + "Auto-merging src/test/java/org/apache/commons/collections4/CollectionUtilsTest.java\n", + "CONFLICT (content): Merge conflict in src/test/java/org/apache/commons/collections4/CollectionUtilsTest.java\n", + "Automatic merge failed; fix conflicts and then commit the result.\n", + "Conflict\n", + "\n", + "apache/commons-collections : Cloning repo\n", + "apache/commons-collections : Finished cloning\n", + "apache/commons-collections : Finished cloning\n" ] } ], "source": [ - "row_num = 548\n", + "row_num = 184\n", "merge_tool = \"gitmerge_ort\"\n", "# merge_tool = \"gitmerge_ort_adjacent\"\n", "# merge_tool = \"gitmerge_ort_ignorespace\"\n", @@ -53,7 +59,8 @@ "# merge_tool = \"intellimerge\"\n", "\n", "from diff3_analysis import diff3_analysis\n", - "diff3_analysis(merge_tool, row_num)" + "from diff3_analysis_no_base import diff3_analysis_no_base\n", + "diff3_analysis_no_base(merge_tool, row_num)" ] } ], diff --git a/src/python/diff3_analysis.py b/src/python/diff3_analysis.py index ef61265f76..ce1220c98d 100644 --- a/src/python/diff3_analysis.py +++ b/src/python/diff3_analysis.py @@ -35,13 +35,14 @@ def diff3_analysis(merge_tool: str, repo_num: int): repo.remote().fetch() left_sha = df.iloc[repo_num]["left"] repo.git.checkout(left_sha, force=True) + print("Checking out left" + left_sha) repo.submodule_update() repo.git.checkout("-b", "TEMP_LEFT_BRANCH", force=True) repo.git.checkout(df.iloc[repo_num]["right"], force=True) + print("Checking out right" + df.iloc[repo_num]["right"]) repo.submodule_update() repo.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True) - base_sha = subprocess.run( [ "git", @@ -53,11 +54,14 @@ def diff3_analysis(merge_tool: str, repo_num: int): stdout=subprocess.PIPE, text=True, ) - print(base_sha.stdout) + print("Found base sha" + base_sha.stdout) + repo = clone_repo_to_path( repo_name, "./repos/base" ) # Return a Git-Python repo object - repo.git.checkout(base_sha.stdout, force=True) + repo.remote().fetch() + base_sha = base_sha.stdout.strip() + repo.git.checkout(base_sha, force=True) repo.submodule_update() result = subprocess.run( diff --git a/src/python/diff3_analysis_no_base.py b/src/python/diff3_analysis_no_base.py new file mode 100644 index 0000000000..99fede9b28 --- /dev/null +++ b/src/python/diff3_analysis_no_base.py @@ -0,0 +1,121 @@ +"""Runs a merge and uses diff3 to compare it to the base and final branch of a given repo. +""" +import subprocess +import re +import os +import shutil +import tempfile +import pandas as pd +from repo import clone_repo_to_path +from merge_tester import MERGE_STATE + +# pylint: disable-msg=too-many-locals + + +def diff3_analysis_no_base(merge_tool: str, repo_num: int): + """ + Analyzes merge conflicts using the diff3 tool and opens the results in the default text viewer. + + Args: + merge_tool (str): The merge tool to be used. + repo_num (int): The index of the repository in the results DataFrame. + + Returns: + None + """ + shutil.rmtree("./repos", ignore_errors=True) + + df = pd.read_csv("../../results_greatest_hits/result.csv") + repo_name = df.iloc[repo_num]["repository"] + + script = "../scripts/merge_tools/" + merge_tool + ".sh" + repo = clone_repo_to_path( + repo_name, "./repos/merge_attempt" + ) # Return a Git-Python repo object + repo.remote().fetch() + left_sha = df.iloc[repo_num]["left"] + repo.git.checkout(left_sha, force=True) + print("Checking out left" + left_sha) + repo.submodule_update() + repo.git.checkout("-b", "TEMP_LEFT_BRANCH", force=True) + repo.git.checkout(df.iloc[repo_num]["right"], force=True) + print("Checking out right" + df.iloc[repo_num]["right"]) + repo.submodule_update() + repo.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True) + + + result = subprocess.run( + [ + script, + repo.git.rev_parse("--show-toplevel"), + "TEMP_LEFT_BRANCH", + "TEMP_RIGHT_BRANCH", + ], + stdout=subprocess.PIPE, + text=True, + ) + + conflict_file_matches = re.findall( + r"CONFLICT \(.+\): Merge conflict in (.+)", result.stdout + ) + + print(result.stdout) + + repo = clone_repo_to_path( + repo_name, "./repos/programmer_merge" + ) # Return a Git-Python repo object + repo.git.checkout(df.iloc[repo_num]["merge"], force=True) + repo.submodule_update() + + + for conflict_file_match in conflict_file_matches: + conflicting_file = str(conflict_file_match) + conflict_path = os.path.join(repo_name, conflicting_file) + conflict_path_merge_attempt = os.path.join( + "./repos/merge_attempt", conflict_path + ) + + conflict_path_programmer_merge = os.path.join( + "./repos/programmer_merge", conflict_path + ) + + diff_results = subprocess.run( + [ + "diff", + conflict_path_merge_attempt, + conflict_path_programmer_merge, + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + # Use a temporary file to store the diff results + with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: + temp_file.write(diff_results.stdout) + + # Open the saved text file with the default application + subprocess.run(["xdg-open", temp_file.name], check=True) + + # Delete the temporary file + os.remove(temp_file.name) + + # Deletes base, programmer_merge, and merge_attempt folders in repos dir + # We do this to prevent errors if cloning the same repo into the folder twice + ''' + subprocess.run( + ["rm", "-rf", "./repos/base"], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + subprocess.run( + ["rm", "-rf", "./repos/merge_attempt"], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + subprocess.run( + ["rm", "-rf", "./repos/programmer_merge"], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + '''