diff --git a/src/python/README.md b/src/python/README.md new file mode 100644 index 0000000000..1d83ea921d --- /dev/null +++ b/src/python/README.md @@ -0,0 +1,51 @@ +# Python Scripts for Merge Conflict Analysis + + + + +This directory contains Python scripts designed to facilitate the analysis of merge conflicts using various merge tools. The scripts allow users to recreate merges, analyze conflicts, and compare different merge algorithms' effectiveness. + + + + +## Scripts Overview + + + + +- `diff3_analysis.py`: This script analyzes merge conflicts for two merge tools on a given conflict. The tool that failed to merge should come first as an argument. + + + + +## Prerequisites + + + + +- Python 3.x installed on your system. +- Necessary Python packages installed (e.g., `pandas`, `GitPython`). + + + + +## Usage + + + + +### Analyzing a Single Merge Conflict + + + + +To analyze a conflicts comparing two merge tools: + + +python3 diff3_analysis.py + + + +Ex: + +python3 diff3_analysis.py "gitmerge_ort" "spork" 32 "./mixed_results_spork" \ No newline at end of file diff --git a/src/python/diff3_analysis.py b/src/python/diff3_analysis.py index eaa9d0b534..626dd4196e 100644 --- a/src/python/diff3_analysis.py +++ b/src/python/diff3_analysis.py @@ -1,5 +1,5 @@ -"""Runs a merge and uses diff3 to compare it to the base and final branch of a given repo. -See readme for details on shell scripts to run these methods. +""" +Recreates a merge and outputs the diff files for two algorithms for comparison on a given conflict. """ import sys @@ -10,48 +10,120 @@ import shutil import tempfile import pandas as pd -from repo import clone_repo +from repo import clone_repo_to_path from merge_tester import MERGE_STATE + # pylint: disable-msg=too-many-locals +# pylint: disable-msg=too-many-arguments + + +def setup_environment(): + """Remove repository directories to clean up the environment.""" + shutil.rmtree("./repos", ignore_errors=True) -def diff3_analysis(merge_tool: str, results_index: int, repo_output_dir): +def clone_and_checkout(repo_name, branch_sha, clone_dir): """ - Analyzes merge conflicts using the diff3 tool and opens the results in the default text viewer. + Clone a repository to a specified path and checkout a given SHA. + + + Args: + repo_name (str): The repository to clone. + branch_sha (str): The SHA commit or branch to checkout. + clone_dir (str): Directory path to clone the repository. + + + Returns: + repo (GitPython.repo): The cloned repository object. + """ + repo = clone_repo_to_path(repo_name, clone_dir) + repo.remote().fetch() + repo.git.checkout(branch_sha, force=True) + repo.submodule_update() + return repo + + +def process_diff( + tool_name, base_path, attempt_path, merge_path, output_dir, index, filename +): + """ + Process the diff between files and save the output to a designated file. + + Args: + tool_name (str): Identifier for the merge tool. + base_path (str): Path to the base file. + attempt_path (str): Path to the merge attempt file. + merge_path (str): Path to the manually merged file. + output_dir (str): Directory where results will be saved. + index (int): Index of the repository in the results list. + filename (str): Base name for the output file. + """ + # Run diff3 or fall back to diff if files are missing + diff_results = subprocess.run( + ["diff3", base_path, attempt_path, merge_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if "No such file or directory" in diff_results.stderr: + diff_results = subprocess.run( + ["diff", attempt_path, merge_path], stdout=subprocess.PIPE, text=True + ) + + # Prepare the output filename + diff_filename = os.path.join( + output_dir, str(index), tool_name, f"diff_{filename}.txt" + ) + os.makedirs( + os.path.dirname(diff_filename), exist_ok=True + ) # Ensure the directory exists + + # Write the diff results to the file + with open(diff_filename, "w") as diff_file: + diff_file.write(diff_results.stdout) + print(f"Diff results saved to {diff_filename}") + +def diff3_analysis( + merge_tool1: str, merge_tool2: str, results_index: int, repo_output_dir +): + """ + Analyzes merge conflicts using the diff3 tool and opens the results in the default text viewer. Args: - merge_tool (str): The merge tool to be used. - results_index (int): The index of the repository in the results DataFrame. + merge_tool1 (str): The merge tool that Merge_failed (tool name as written in spreadsheet) + merge_tool2 (str): The merge tool that Failed_tests or Passed_tests + results_index (int): The index of the repository in the results spreadsheet. repo_output_dir (path): The path of where we want to store the results from the analysis + Returns: None """ # Deletes base, programmer_merge, and merge_attempt folders in repos dir # We do this to prevent errors if cloning the same repo into the folder twice - shutil.rmtree("./repos", ignore_errors=True) + setup_environment() # Retrieve left and right branch from hash in repo df = pd.read_csv("../../results/combined/result.csv") repo_name = df.iloc[results_index]["repository"] - script = "../scripts/merge_tools/" + merge_tool + ".sh" - repo = clone_repo( + script = "../scripts/merge_tools/" + merge_tool1 + ".sh" + repo = clone_repo_to_path( repo_name, "./repos/merge_attempt1" ) # Return a Git-Python repo object repo.remote().fetch() left_sha = df.iloc[results_index]["left"] repo.git.checkout(left_sha, force=True) - print("Checking out left" + left_sha) repo.submodule_update() repo.git.checkout("-b", "TEMP_LEFT_BRANCH", force=True) repo.git.checkout(df.iloc[results_index]["right"], force=True) - print("Checking out right" + df.iloc[results_index]["right"]) repo.submodule_update() repo.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True) + print("Checked out left and right") + # Clone the base base_sha = subprocess.run( [ "git", @@ -64,13 +136,10 @@ def diff3_analysis(merge_tool: str, results_index: int, repo_output_dir): text=True, ) print("Found base sha" + base_sha.stdout) - - repo2 = clone_repo(repo_name, "./repos/base") # Return a Git-Python repo object - repo2.remote().fetch() base_sha = base_sha.stdout.strip() - repo2.git.checkout(base_sha, force=True) - repo2.submodule_update() + repo2 = clone_and_checkout(repo_name, base_sha, "./repos/base") + # Recreate the merge result = subprocess.run( [ script, @@ -87,103 +156,103 @@ def diff3_analysis(merge_tool: str, results_index: int, repo_output_dir): ) print(result.stdout) - repo3 = clone_repo( - repo_name, "./repos/programmer_merge" - ) # Return a Git-Python repo object - repo3.git.checkout(df.iloc[results_index]["merge"], force=True) - repo3.submodule_update() + if conflict_file_matches == []: + print("No conflict files to search") + return + repo3 = clone_and_checkout( + repo_name, df.iloc[results_index]["merge"], "./repos/programmer_merge" + ) print(conflict_file_matches) + script = "../scripts/merge_tools/" + merge_tool2 + ".sh" + repo4 = clone_repo_to_path( + repo_name, "./repos/merge_attempt2" + ) # Return a Git-Python repo object + repo4.remote().fetch() + left_sha = df.iloc[results_index]["left"] + repo4.git.checkout(left_sha, force=True) + print("Checking out left" + left_sha) + repo4.submodule_update() + repo4.git.checkout("-b", "TEMP_LEFT_BRANCH", force=True) + repo4.git.checkout(df.iloc[results_index]["right"], force=True) + print("Checking out right" + df.iloc[results_index]["right"]) + repo4.submodule_update() + repo4.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True) + for conflict_file_match in conflict_file_matches: conflicting_file = str(conflict_file_match) conflict_path = os.path.join(repo_name, conflicting_file) - conflict_path_merge_attempt = os.path.join( + conflict_file_base, _ = os.path.splitext(os.path.basename(conflicting_file)) + + # Paths for the first merge attempt + conflict_path_merge_attempt1 = os.path.join( "./repos/merge_attempt1", conflict_path ) - conflict_path_base = os.path.join("./repos/base", conflict_path) conflict_path_programmer_merge = os.path.join( "./repos/programmer_merge", conflict_path ) - diff_results = subprocess.run( - [ - "diff3", - conflict_path_base, - conflict_path_merge_attempt, - conflict_path_programmer_merge, - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, + # Process the first merge attempt + process_diff( + merge_tool1, + conflict_path_base, + conflict_path_merge_attempt1, + conflict_path_programmer_merge, + repo_output_dir, + results_index, + conflict_file_base, ) - # Check that diff3 didn't run into missing files in the base - error_message = "No such file or directory" - if error_message in diff_results.stderr: - # Since the conflict file was added in both parents we can't diff the base. - diff_results = subprocess.run( - [ - "diff", - conflict_path_merge_attempt, - conflict_path_programmer_merge, - ], - stdout=subprocess.PIPE, - text=True, - ) - - # Remove ._ at the end of the file name that will mess things up - conflicting_file_base, _ = os.path.splitext(os.path.basename(conflicting_file)) - - # Generate a filename for the diff result, including the new subdirectory - diff_filename = os.path.join( - repo_output_dir, str(results_index), f"diff_{conflicting_file_base}.txt" - ) + """ + BREAK + """ - # Extract the directory part from diff_filename - output_dir = os.path.dirname(diff_filename) - - # Ensure the output directory exists - os.makedirs(output_dir, exist_ok=True) - - # Write the diff results to the file - with open(diff_filename, "w") as diff_file: - diff_file.write(diff_results.stdout) + # Paths for the second merge attempt + conflict_path_merge_attempt2 = os.path.join( + "./repos/merge_attempt2", conflict_path + ) - # Optionally, print or log the path of the diff file - print(f"Diff results saved to {diff_filename}") + # Process the second merge attempt + process_diff( + merge_tool2, + conflict_path_base, + conflict_path_merge_attempt2, + conflict_path_programmer_merge, + repo_output_dir, + results_index, + conflict_file_base, + ) -def main(merge_tool: str, results_index: int, repo_output_dir: str): +def main(): """ - Entry point for the script when run from the command line. + Parses arguments and calls diff3_analysis from the CLI """ - # Convert results_index to int here if using argparse - diff3_analysis(merge_tool, results_index, repo_output_dir) - - -if __name__ == "__main__": - # Use argparse to parse command line arguments parser = argparse.ArgumentParser( - description="Analyze merge conflicts using the diff3 tool." + description="Process and compare merge conflicts using two tools." ) - parser.add_argument("merge_tool", type=str, help="The merge tool to be used.") + parser.add_argument("merge_tool1", type=str, help="The first merge tool to use") + parser.add_argument("merge_tool2", type=str, help="The second merge tool to use") parser.add_argument( "results_index", type=int, - help="The index of the repository in the results DataFrame.", + help="The index of the repository in the results spreadsheet", ) parser.add_argument( - "repo_output_dir", - type=str, - help="The path of where we want to store the results from the analysis.", + "repo_output_dir", type=str, help="The directory to store the results" ) args = parser.parse_args() - # Ensure the output directory exists - os.makedirs(args.repo_output_dir, exist_ok=True) + diff3_analysis( + merge_tool1=args.merge_tool1, + merge_tool2=args.merge_tool2, + results_index=args.results_index, + repo_output_dir=args.repo_output_dir, + ) + - # Call main function with parsed arguments - main(args.merge_tool, args.results_index, args.repo_output_dir) +if __name__ == "__main__": + main() diff --git a/src/python/diff3_pair_analysis.py b/src/python/diff3_pair_analysis.py deleted file mode 100644 index b6588fc23b..0000000000 --- a/src/python/diff3_pair_analysis.py +++ /dev/null @@ -1,239 +0,0 @@ -"""Runs a merge and uses diff3 to compare it to the base and final branch of a given repo. -""" - -import sys -import argparse -import subprocess -import re -import os -import shutil -import tempfile -import pandas as pd -from repo import clone_repo_to_path -from merge_tester import MERGE_STATE - -# pylint: disable-msg=too-many-locals -# pylint: disable-msg=too-many-statements - - -def diff3_pair_analysis( - merge_tool1: str, merge_tool2: str, results_index: int, repo_output_dir -): - """ - Analyzes merge conflicts using the diff3 tool and opens the results in the default text viewer. - - Args: - merge_tool (str): The merge tool to be used. - results_index (int): The index of the repository in the results DataFrame. - repo_output_dir (path): The path of where we want to store the results from the analysis - - Returns: - None - """ - - # Deletes base, programmer_merge, and merge_attempt folders in repos dir - # We do this to prevent errors if cloning the same repo into the folder twice - shutil.rmtree("./repos", ignore_errors=True) - - # Retrieve left and right branch from hash in repo - df = pd.read_csv("../../results/combined/result.csv") - repo_name = df.iloc[results_index]["repository"] - - script = "../scripts/merge_tools/" + merge_tool1 + ".sh" - repo = clone_repo_to_path( - repo_name, "./repos/merge_attempt1" - ) # Return a Git-Python repo object - repo.remote().fetch() - left_sha = df.iloc[results_index]["left"] - repo.git.checkout(left_sha, force=True) - print("Checking out left" + left_sha) - repo.submodule_update() - repo.git.checkout("-b", "TEMP_LEFT_BRANCH", force=True) - repo.git.checkout(df.iloc[results_index]["right"], force=True) - print("Checking out right" + df.iloc[results_index]["right"]) - repo.submodule_update() - repo.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True) - - base_sha = subprocess.run( - [ - "git", - "merge-base", - "TEMP_LEFT_BRANCH", - "TEMP_RIGHT_BRANCH", - ], - cwd="./repos/merge_attempt1/" + repo_name, - stdout=subprocess.PIPE, - text=True, - ) - print("Found base sha" + base_sha.stdout) - - repo2 = clone_repo_to_path( - repo_name, "./repos/base" - ) # Return a Git-Python repo object - repo2.remote().fetch() - base_sha = base_sha.stdout.strip() - repo2.git.checkout(base_sha, force=True) - repo2.submodule_update() - - result = subprocess.run( - [ - script, - repo.git.rev_parse("--show-toplevel"), - "TEMP_LEFT_BRANCH", - "TEMP_RIGHT_BRANCH", - ], - stdout=subprocess.PIPE, - text=True, - ) - - conflict_file_matches = re.findall( - r"CONFLICT \(.+\): Merge conflict in (.+)", result.stdout - ) - print(result.stdout) - - if conflict_file_matches == []: - print("No conflict files to search") - return - - repo3 = clone_repo_to_path( - repo_name, "./repos/programmer_merge" - ) # Return a Git-Python repo object - repo3.git.checkout(df.iloc[results_index]["merge"], force=True) - repo3.submodule_update() - - print(conflict_file_matches) - - script = "../scripts/merge_tools/" + merge_tool2 + ".sh" - repo4 = clone_repo_to_path( - repo_name, "./repos/merge_attempt2" - ) # Return a Git-Python repo object - repo4.remote().fetch() - left_sha = df.iloc[results_index]["left"] - repo4.git.checkout(left_sha, force=True) - print("Checking out left" + left_sha) - repo4.submodule_update() - repo4.git.checkout("-b", "TEMP_LEFT_BRANCH", force=True) - repo4.git.checkout(df.iloc[results_index]["right"], force=True) - print("Checking out right" + df.iloc[results_index]["right"]) - repo4.submodule_update() - repo4.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True) - - for conflict_file_match in conflict_file_matches: - conflicting_file = str(conflict_file_match) - conflict_path = os.path.join(repo_name, conflicting_file) - conflict_path_merge_attempt1 = os.path.join( - "./repos/merge_attempt1", conflict_path - ) - - conflict_path_base = os.path.join("./repos/base", conflict_path) - conflict_path_programmer_merge = os.path.join( - "./repos/programmer_merge", conflict_path - ) - - diff_results = subprocess.run( - [ - "diff3", - conflict_path_base, - conflict_path_merge_attempt1, - conflict_path_programmer_merge, - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - - # Check that diff3 didn't run into missing files in the base - error_message = "No such file or directory" - if error_message in diff_results.stderr: - # Since the conflict file was added in both parents we can't diff the base. - diff_results = subprocess.run( - [ - "diff", - conflict_path_merge_attempt1, - conflict_path_programmer_merge, - ], - stdout=subprocess.PIPE, - text=True, - ) - - # Remove ._ at the end of the file name that will mess things up - conflicting_file_base, _ = os.path.splitext(os.path.basename(conflicting_file)) - - # Generate a filename for the diff result, including the new subdirectory - diff_filename = os.path.join( - repo_output_dir, - str(results_index), - merge_tool1, - f"diff_{conflicting_file_base}.txt", - ) - - # Extract the directory part from diff_filename - output_dir = os.path.dirname(diff_filename) - - # Ensure the output directory exists - os.makedirs(output_dir, exist_ok=True) - - # Write the diff results to the file - with open(diff_filename, "w") as diff_file: - diff_file.write(diff_results.stdout) - - # Optionally, print or log the path of the diff file - print(f"Diff results saved to {diff_filename}") - - """ - - BREAK - - """ - - conflict_path = os.path.join(repo_name, conflicting_file) - conflict_path_merge_attempt2 = os.path.join( - "./repos/merge_attempt2", conflict_path - ) - - diff_results = subprocess.run( - [ - "diff3", - conflict_path_base, - conflict_path_merge_attempt2, - conflict_path_programmer_merge, - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - - # Check that diff3 didn't run into missing files in the base - error_message = "No such file or directory" - if error_message in diff_results.stderr: - # Since the conflict file was added in both parents we can't diff the base. - diff_results = subprocess.run( - [ - "diff", - conflict_path_merge_attempt2, - conflict_path_programmer_merge, - ], - stdout=subprocess.PIPE, - text=True, - ) - - # Generate a filename for the diff result, including the new subdirectory - diff_filename = os.path.join( - repo_output_dir, - str(results_index), - merge_tool2, - f"diff_{conflicting_file_base}.txt", - ) - - # Extract the directory part from diff_filename - output_dir = os.path.dirname(diff_filename) - - # Ensure the output directory exists - os.makedirs(output_dir, exist_ok=True) - - # Write the diff results to the file - with open(diff_filename, "w") as diff_file: - diff_file.write(diff_results.stdout) - - # Optionally, print or log the path of the diff file - print(f"Diff results saved to {diff_filename}") diff --git a/src/python/high_level_analysis.ipynb b/src/python/high_level_analysis.ipynb new file mode 100644 index 0000000000..2b326c9318 --- /dev/null +++ b/src/python/high_level_analysis.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1181 entries, 0 to 1180\n", + "Data columns (total 45 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 idx 1181 non-null object\n", + " 1 repo-idx 1181 non-null int64 \n", + " 2 merge-idx 1181 non-null int64 \n", + " 3 branch_name 1181 non-null object\n", + " 4 merge 1181 non-null object\n", + " 5 left 1181 non-null object\n", + " 6 right 1181 non-null object\n", + " 7 notes 1 non-null object\n", + " 8 test merge 1181 non-null bool \n", + " 9 diff contains java file 1181 non-null bool \n", + " 10 left_tree_fingerprint 1181 non-null object\n", + " 11 left parent test result 1181 non-null object\n", + " 12 right_tree_fingerprint 1181 non-null object\n", + " 13 right parent test result 1181 non-null object\n", + " 14 parents pass 1181 non-null bool \n", + " 15 sampled for testing 1181 non-null bool \n", + " 16 gitmerge_ort 1181 non-null object\n", + " 17 gitmerge_ort_merge_fingerprint 1181 non-null object\n", + " 18 gitmerge_ort_ignorespace 1181 non-null object\n", + " 19 gitmerge_ort_ignorespace_merge_fingerprint 1181 non-null object\n", + " 20 gitmerge_recursive_histogram 1181 non-null object\n", + " 21 gitmerge_recursive_histogram_merge_fingerprint 1181 non-null object\n", + " 22 gitmerge_recursive_ignorespace 1181 non-null object\n", + " 23 gitmerge_recursive_ignorespace_merge_fingerprint 1181 non-null object\n", + " 24 gitmerge_recursive_minimal 1181 non-null object\n", + " 25 gitmerge_recursive_minimal_merge_fingerprint 1181 non-null object\n", + " 26 gitmerge_recursive_myers 1181 non-null object\n", + " 27 gitmerge_recursive_myers_merge_fingerprint 1181 non-null object\n", + " 28 gitmerge_recursive_patience 1181 non-null object\n", + " 29 gitmerge_recursive_patience_merge_fingerprint 1181 non-null object\n", + " 30 gitmerge_resolve 1181 non-null object\n", + " 31 gitmerge_resolve_merge_fingerprint 1181 non-null object\n", + " 32 gitmerge_ort_adjacent 1181 non-null object\n", + " 33 gitmerge_ort_adjacent_merge_fingerprint 1181 non-null object\n", + " 34 gitmerge_ort_imports 1181 non-null object\n", + " 35 gitmerge_ort_imports_merge_fingerprint 1181 non-null object\n", + " 36 gitmerge_ort_imports_ignorespace 1181 non-null object\n", + " 37 gitmerge_ort_imports_ignorespace_merge_fingerprint 1181 non-null object\n", + " 38 git_hires_merge 1181 non-null object\n", + " 39 git_hires_merge_merge_fingerprint 1181 non-null object\n", + " 40 spork 1181 non-null object\n", + " 41 spork_merge_fingerprint 1181 non-null object\n", + " 42 intellimerge 1181 non-null object\n", + " 43 intellimerge_merge_fingerprint 1181 non-null object\n", + " 44 repository 1181 non-null object\n", + "dtypes: bool(4), int64(2), object(39)\n", + "memory usage: 383.0+ KB\n", + "1046\n", + "ebay/xcelite : Cloning repo\n", + "ebay/xcelite : Finished cloning\n", + "ebay/xcelite : Finished cloning\n", + "Checked out left and right\n", + "Found base sha64931b26ccb11d738f22ab50ff2db6ca1070a16c\n", + "\n", + "ebay/xcelite : Cloning repo\n", + "ebay/xcelite : Finished cloning\n", + "ebay/xcelite : Finished cloning\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Switched to branch 'TEMP_LEFT_BRANCH'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running: git merge --no-edit -s ort TEMP_RIGHT_BRANCH\n", + "Auto-merging src/main/java/com/ebay/xcelite/Xcelite.java\n", + "CONFLICT (content): Merge conflict in src/main/java/com/ebay/xcelite/Xcelite.java\n", + "Auto-merging src/main/java/com/ebay/xcelite/options/XceliteOptions.java\n", + "Auto-merging src/main/java/com/ebay/xcelite/reader/BeanSheetReader.java\n", + "CONFLICT (content): Merge conflict in src/main/java/com/ebay/xcelite/reader/BeanSheetReader.java\n", + "Auto-merging src/main/java/com/ebay/xcelite/reader/SimpleSheetReader.java\n", + "CONFLICT (content): Merge conflict in src/main/java/com/ebay/xcelite/reader/SimpleSheetReader.java\n", + "Auto-merging src/test/java/com/ebay/xcelite/reader/AnyColumnTest.java\n", + "CONFLICT (content): Merge conflict in src/test/java/com/ebay/xcelite/reader/AnyColumnTest.java\n", + "Automatic merge failed; fix conflicts and then commit the result.\n", + "Conflict\n", + "\n", + "ebay/xcelite : Cloning repo\n", + "ebay/xcelite : Finished cloning\n", + "ebay/xcelite : Finished cloning\n", + "['src/main/java/com/ebay/xcelite/Xcelite.java', 'src/main/java/com/ebay/xcelite/reader/BeanSheetReader.java', 'src/main/java/com/ebay/xcelite/reader/SimpleSheetReader.java', 'src/test/java/com/ebay/xcelite/reader/AnyColumnTest.java']\n", + "ebay/xcelite : Cloning repo\n", + "ebay/xcelite : Finished cloning\n", + "ebay/xcelite : Finished cloning\n", + "Checking out leftbbccda650a890b30b8d5c0d8a6d66f03410fd4bd\n", + "Checking out right9d35e9ae77230c74d655add3c90aaf8f12b235b9\n", + "Diff results saved to ./mixed_results_resolve/1046/gitmerge_ort/diff_Xcelite.txt\n", + "Diff results saved to ./mixed_results_resolve/1046/spork/diff_Xcelite.txt\n", + "Diff results saved to ./mixed_results_resolve/1046/gitmerge_ort/diff_BeanSheetReader.txt\n", + "Diff results saved to ./mixed_results_resolve/1046/spork/diff_BeanSheetReader.txt\n", + "Diff results saved to ./mixed_results_resolve/1046/gitmerge_ort/diff_SimpleSheetReader.txt\n", + "Diff results saved to ./mixed_results_resolve/1046/spork/diff_SimpleSheetReader.txt\n", + "Diff results saved to ./mixed_results_resolve/1046/gitmerge_ort/diff_AnyColumnTest.txt\n", + "Diff results saved to ./mixed_results_resolve/1046/spork/diff_AnyColumnTest.txt\n", + "243\n", + "javaparser/javaparser : Cloning repo\n", + "javaparser/javaparser : Finished cloning\n" + ] + } + ], + "source": [ + "import subprocess\n", + "import re\n", + "import os\n", + "import tempfile\n", + "import pandas as pd\n", + "\n", + "from merge_tester import MERGE_STATE\n", + "from diff3_analysis import diff3_analysis\n", + "from diff3_analysis import diff3_analysis\n", + "\n", + "df = pd.read_csv('../../results/greatest_hits/result.csv')\n", + "\n", + "df.info()\n", + "\n", + "shuffled_df = df.sample(frac=1, random_state=42)\n", + "\n", + "# Filter rows where column 16 and 42 are different\n", + "repo_output_dir = os.path.join(\"./mixed_results_resolve\")\n", + "os.makedirs(repo_output_dir, exist_ok=True)\n", + "\n", + "# Iterate over rows\n", + "count = 0\n", + "total = 0\n", + "for index, row in shuffled_df.iterrows():\n", + " total += 1\n", + " if row.iloc[16] == \"Merge_failed\" and row.iloc[40] == \"Tests_passed\":\n", + " print(index)\n", + " count += 1\n", + " diff3_pair_analysis(\"gitmerge_ort\", \"spork\", index, repo_output_dir)\n", + "print(\"count:\")\n", + "print(count)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "brunoribeiro/sql-parser : Cloning repo\n", + "brunoribeiro/sql-parser : Finished cloning\n", + "brunoribeiro/sql-parser : Finished cloning\n", + "Checked out left and right\n", + "Found base shac75b2c654691d95a9f6adf899137047bf7b12611\n", + "\n", + "brunoribeiro/sql-parser : Cloning repo\n", + "brunoribeiro/sql-parser : Finished cloning\n", + "brunoribeiro/sql-parser : Finished cloning\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Switched to branch 'TEMP_LEFT_BRANCH'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running: git merge --no-edit -s ort TEMP_RIGHT_BRANCH\n", + "Auto-merging src/main/java/com/akiban/sql/parser/NodeFactoryImpl.java\n", + "Auto-merging src/main/java/com/akiban/sql/parser/NodeNames.java\n", + "Auto-merging src/main/java/com/akiban/sql/parser/NodeTypes.java\n", + "CONFLICT (content): Merge conflict in src/main/java/com/akiban/sql/parser/NodeTypes.java\n", + "Auto-merging src/main/java/com/akiban/sql/unparser/NodeToString.java\n", + "CONFLICT (content): Merge conflict in src/main/java/com/akiban/sql/unparser/NodeToString.java\n", + "Auto-merging src/main/javacc/SQLGrammar.jj\n", + "Automatic merge failed; fix conflicts and then commit the result.\n", + "Conflict\n", + "\n", + "brunoribeiro/sql-parser : Cloning repo\n", + "brunoribeiro/sql-parser : Finished cloning\n", + "brunoribeiro/sql-parser : Finished cloning\n", + "['src/main/java/com/akiban/sql/parser/NodeTypes.java', 'src/main/java/com/akiban/sql/unparser/NodeToString.java']\n", + "brunoribeiro/sql-parser : Cloning repo\n", + "brunoribeiro/sql-parser : Finished cloning\n", + "brunoribeiro/sql-parser : Finished cloning\n", + "Checking out left492a69a136036053b88bc77083ba382c0c4cd38a\n", + "Checking out right1f2b87746e5e36bf83b4e68b5cc42a5540fcbfd9\n", + "Diff results saved to ./mixed_results_resolve/32/gitmerge_ort/diff_NodeTypes.txt\n", + "Diff results saved to ./mixed_results_resolve/32/spork/diff_NodeTypes.txt\n", + "Diff results saved to ./mixed_results_resolve/32/gitmerge_ort/diff_NodeToString.txt\n", + "Diff results saved to ./mixed_results_resolve/32/spork/diff_NodeToString.txt\n" + ] + } + ], + "source": [ + "diff3_pair_analysis(\"gitmerge_ort\", \"spork\", 32, repo_output_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "research", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/python/readme.md b/src/python/readme.md deleted file mode 100644 index 8ea9316799..0000000000 --- a/src/python/readme.md +++ /dev/null @@ -1,85 +0,0 @@ -# Python Scripts for Merge Conflict Analysis - - - - -This directory contains Python scripts designed to facilitate the analysis of merge conflicts using various merge tools. The scripts allow users to recreate merges, analyze conflicts, and compare different merge algorithms' effectiveness. - - - - -## Scripts Overview - - - - -- `diff3_analysis.py`: This script analyzes merge conflicts for a single specified merge tool and commit. -- `run_diff3_analysis.py`: This script automates the analysis across multiple commits and merge tools, aggregating the results. - - - - -## Prerequisites - - - - -- Python 3.x installed on your system. -- Necessary Python packages installed (e.g., `pandas`, `GitPython`). - - - - -## Usage - - - - -### Analyzing a Single Merge Conflict - - - - -To analyze merge conflicts using a specific merge tool for a single commit: - - -python3 diff3_analysis.py - - - - -Ex: - - -python3 diff3_analysis.py gitmerge_ort 582 ./merge_conflict_analysis_diffs/582/gitmerge_ort - - - - -: The merge tool to use for the analysis (e.g., gitmerge_ort). -: The index of the commit in the dataset. -: The directory where the analysis results will be saved. - - - - -Running Bulk Analysis -To run the analysis over multiple commits and all merge tools: - - -python3 run_analysis.py --results_index --repo_output_dir "" - - - - -Ex: - - -python3 run_diff3_analysis.py --results_index 582,427,930 --repo_output_dir "./merge_conflict_analysis_diffs" - - -: Comma-separated list of commit indices to analyze. Example: 582,427,930. -: The directory where the bulk analysis results will be saved. - - - diff --git a/src/python/repo.py b/src/python/repo.py index d2d47c52ac..aed5e00aef 100755 --- a/src/python/repo.py +++ b/src/python/repo.py @@ -93,6 +93,8 @@ def clone_repo(repo_slug: str, repo_dir: Path) -> git.repo.Repo: ) from None +# Alternative clone repo method that returns git repo object for diff3 scripts +@timeout(10 * 60) def clone_repo_to_path(repo_slug: str, path: str) -> git.repo.Repo: """Clones a repository, or runs `git fetch` if the repository is already cloned. Args: