diff --git a/src/python/diff3_analysis.ipynb b/src/python/diff3_analysis.ipynb index b730faee9c..558935f300 100644 --- a/src/python/diff3_analysis.ipynb +++ b/src/python/diff3_analysis.ipynb @@ -2,47 +2,11 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "apache/commons-collections : Cloning repo\n", - "apache/commons-collections : Finished cloning\n", - "apache/commons-collections : Finished cloning\n", - "Checking out left0b8a4c7b71b682ac8417822ac693cbc8b6c261b3\n", - "Checking out right2cbac58f7e3b51a4f2f3a6672bb4380e18469c50\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Switched to branch 'TEMP_LEFT_BRANCH'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running: git merge --no-edit -s ort TEMP_RIGHT_BRANCH\n", - "Auto-merging src/main/java/org/apache/commons/collections4/CollectionUtils.java\n", - "CONFLICT (content): Merge conflict in src/main/java/org/apache/commons/collections4/CollectionUtils.java\n", - "Auto-merging src/test/java/org/apache/commons/collections4/CollectionUtilsTest.java\n", - "CONFLICT (content): Merge conflict in src/test/java/org/apache/commons/collections4/CollectionUtilsTest.java\n", - "Automatic merge failed; fix conflicts and then commit the result.\n", - "Conflict\n", - "\n", - "apache/commons-collections : Cloning repo\n", - "apache/commons-collections : Finished cloning\n", - "apache/commons-collections : Finished cloning\n" - ] - } - ], + "outputs": [], "source": [ - "row_num = 184\n", + "row_num = 1444\n", "merge_tool = \"gitmerge_ort\"\n", "# merge_tool = \"gitmerge_ort_adjacent\"\n", "# merge_tool = \"gitmerge_ort_ignorespace\"\n", @@ -59,8 +23,7 @@ "# merge_tool = \"intellimerge\"\n", "\n", "from diff3_analysis import diff3_analysis\n", - "from diff3_analysis_no_base import diff3_analysis_no_base\n", - "diff3_analysis_no_base(merge_tool, row_num)" + "diff3_analysis(merge_tool, row_num)" ] } ], diff --git a/src/python/diff3_analysis.py b/src/python/diff3_analysis.py index ce1220c98d..b1aeba9123 100644 --- a/src/python/diff3_analysis.py +++ b/src/python/diff3_analysis.py @@ -1,5 +1,6 @@ """Runs a merge and uses diff3 to compare it to the base and final branch of a given repo. """ + import subprocess import re import os @@ -12,34 +13,44 @@ # pylint: disable-msg=too-many-locals -def diff3_analysis(merge_tool: str, repo_num: int): +def diff3_analysis(merge_tool: str, results_index: int): """ Analyzes merge conflicts using the diff3 tool and opens the results in the default text viewer. Args: merge_tool (str): The merge tool to be used. - repo_num (int): The index of the repository in the results DataFrame. + results_index (int): The index of the repository in the results DataFrame. Returns: None """ + + # Deletes base, programmer_merge, and merge_attempt folders in repos dir + # We do this to prevent errors if cloning the same repo into the folder twice shutil.rmtree("./repos", ignore_errors=True) + # Ensure the base output directory exists + base_output_dir = "./merge_conflict_analysis_diffs" + # Create a subdirectory for this specific results_index + repo_output_dir = os.path.join(base_output_dir, f"index_{results_index}") + os.makedirs(repo_output_dir, exist_ok=True) + + # Retrieve left and right branch from hash in repo df = pd.read_csv("../../results_greatest_hits/result.csv") - repo_name = df.iloc[repo_num]["repository"] + repo_name = df.iloc[results_index]["repository"] script = "../scripts/merge_tools/" + merge_tool + ".sh" repo = clone_repo_to_path( repo_name, "./repos/merge_attempt" ) # Return a Git-Python repo object repo.remote().fetch() - left_sha = df.iloc[repo_num]["left"] + left_sha = df.iloc[results_index]["left"] repo.git.checkout(left_sha, force=True) print("Checking out left" + left_sha) repo.submodule_update() repo.git.checkout("-b", "TEMP_LEFT_BRANCH", force=True) - repo.git.checkout(df.iloc[repo_num]["right"], force=True) - print("Checking out right" + df.iloc[repo_num]["right"]) + repo.git.checkout(df.iloc[results_index]["right"], force=True) + print("Checking out right" + df.iloc[results_index]["right"]) repo.submodule_update() repo.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True) @@ -56,13 +67,13 @@ def diff3_analysis(merge_tool: str, repo_num: int): ) print("Found base sha" + base_sha.stdout) - repo = clone_repo_to_path( + repo2 = clone_repo_to_path( repo_name, "./repos/base" ) # Return a Git-Python repo object - repo.remote().fetch() + repo2.remote().fetch() base_sha = base_sha.stdout.strip() - repo.git.checkout(base_sha, force=True) - repo.submodule_update() + repo2.git.checkout(base_sha, force=True) + repo2.submodule_update() result = subprocess.run( [ @@ -81,12 +92,11 @@ def diff3_analysis(merge_tool: str, repo_num: int): print(result.stdout) - repo = clone_repo_to_path( + repo3 = clone_repo_to_path( repo_name, "./repos/programmer_merge" ) # Return a Git-Python repo object - repo.git.checkout(df.iloc[repo_num]["merge"], force=True) - repo.submodule_update() - + repo3.git.checkout(df.iloc[results_index]["merge"], force=True) + repo3.submodule_update() for conflict_file_match in conflict_file_matches: conflicting_file = str(conflict_file_match) @@ -126,32 +136,14 @@ def diff3_analysis(merge_tool: str, repo_num: int): text=True, ) - # Use a temporary file to store the diff results - with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: - temp_file.write(diff_results.stdout) - - # Open the saved text file with the default application - subprocess.run(["xdg-open", temp_file.name], check=True) + # Generate a filename for the diff result, including the new subdirectory + diff_filename = os.path.join( + repo_output_dir, f"diff_{os.path.basename(conflicting_file)}.txt" + ) - # Delete the temporary file - os.remove(temp_file.name) + # Write the diff results to the file + with open(diff_filename, "w") as diff_file: + diff_file.write(diff_results.stdout) - # Deletes base, programmer_merge, and merge_attempt folders in repos dir - # We do this to prevent errors if cloning the same repo into the folder twice - ''' - subprocess.run( - ["rm", "-rf", "./repos/base"], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - ) - subprocess.run( - ["rm", "-rf", "./repos/merge_attempt"], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - ) - subprocess.run( - ["rm", "-rf", "./repos/programmer_merge"], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - ) - ''' + # Optionally, print or log the path of the diff file + print(f"Diff results saved to {diff_filename}") diff --git a/src/python/diff3_analysis_no_base.py b/src/python/diff3_analysis_no_base.py deleted file mode 100644 index 99fede9b28..0000000000 --- a/src/python/diff3_analysis_no_base.py +++ /dev/null @@ -1,121 +0,0 @@ -"""Runs a merge and uses diff3 to compare it to the base and final branch of a given repo. -""" -import subprocess -import re -import os -import shutil -import tempfile -import pandas as pd -from repo import clone_repo_to_path -from merge_tester import MERGE_STATE - -# pylint: disable-msg=too-many-locals - - -def diff3_analysis_no_base(merge_tool: str, repo_num: int): - """ - Analyzes merge conflicts using the diff3 tool and opens the results in the default text viewer. - - Args: - merge_tool (str): The merge tool to be used. - repo_num (int): The index of the repository in the results DataFrame. - - Returns: - None - """ - shutil.rmtree("./repos", ignore_errors=True) - - df = pd.read_csv("../../results_greatest_hits/result.csv") - repo_name = df.iloc[repo_num]["repository"] - - script = "../scripts/merge_tools/" + merge_tool + ".sh" - repo = clone_repo_to_path( - repo_name, "./repos/merge_attempt" - ) # Return a Git-Python repo object - repo.remote().fetch() - left_sha = df.iloc[repo_num]["left"] - repo.git.checkout(left_sha, force=True) - print("Checking out left" + left_sha) - repo.submodule_update() - repo.git.checkout("-b", "TEMP_LEFT_BRANCH", force=True) - repo.git.checkout(df.iloc[repo_num]["right"], force=True) - print("Checking out right" + df.iloc[repo_num]["right"]) - repo.submodule_update() - repo.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True) - - - result = subprocess.run( - [ - script, - repo.git.rev_parse("--show-toplevel"), - "TEMP_LEFT_BRANCH", - "TEMP_RIGHT_BRANCH", - ], - stdout=subprocess.PIPE, - text=True, - ) - - conflict_file_matches = re.findall( - r"CONFLICT \(.+\): Merge conflict in (.+)", result.stdout - ) - - print(result.stdout) - - repo = clone_repo_to_path( - repo_name, "./repos/programmer_merge" - ) # Return a Git-Python repo object - repo.git.checkout(df.iloc[repo_num]["merge"], force=True) - repo.submodule_update() - - - for conflict_file_match in conflict_file_matches: - conflicting_file = str(conflict_file_match) - conflict_path = os.path.join(repo_name, conflicting_file) - conflict_path_merge_attempt = os.path.join( - "./repos/merge_attempt", conflict_path - ) - - conflict_path_programmer_merge = os.path.join( - "./repos/programmer_merge", conflict_path - ) - - diff_results = subprocess.run( - [ - "diff", - conflict_path_merge_attempt, - conflict_path_programmer_merge, - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - - # Use a temporary file to store the diff results - with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: - temp_file.write(diff_results.stdout) - - # Open the saved text file with the default application - subprocess.run(["xdg-open", temp_file.name], check=True) - - # Delete the temporary file - os.remove(temp_file.name) - - # Deletes base, programmer_merge, and merge_attempt folders in repos dir - # We do this to prevent errors if cloning the same repo into the folder twice - ''' - subprocess.run( - ["rm", "-rf", "./repos/base"], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - ) - subprocess.run( - ["rm", "-rf", "./repos/merge_attempt"], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - ) - subprocess.run( - ["rm", "-rf", "./repos/programmer_merge"], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - ) - '''