diff --git a/src/python/README b/src/python/README new file mode 100644 index 0000000000..bb113051b2 --- /dev/null +++ b/src/python/README @@ -0,0 +1,53 @@ +# Python Scripts for Merge Conflict Analysis + + +This directory contains Python scripts designed to facilitate the analysis of merge conflicts using various merge tools. The scripts allow users to recreate merges, analyze conflicts, and compare different merge algorithms' effectiveness. + + +## Scripts Overview + + +- `diff3_analysis.py`: This script analyzes merge conflicts for a single specified merge tool and commit. +- `run_diff3_analysis.py`: This script automates the analysis across multiple commits and merge tools, aggregating the results. + + +## Prerequisites + + +- Python 3.x installed on your system. +- Necessary Python packages installed (e.g., `pandas`, `GitPython`). + + +## Usage + + +### Analyzing a Single Merge Conflict + + +To analyze merge conflicts using a specific merge tool for a single commit: + +python3 diff3_analysis.py + + +Ex: + +python3 diff3_analysis.py gitmerge_ort 582 ./merge_conflict_analysis_diffs/582/gitmerge_ort + + +: The merge tool to use for the analysis (e.g., gitmerge_ort). +: The index of the commit in the dataset. +: The directory where the analysis results will be saved. + + +Running Bulk Analysis +To run the analysis over multiple commits and all merge tools: + +python3 run_diff3_analysis.py --results_index --repo_output_dir "" + + +Ex: + +python3 run_diff3_analysis.py --results_index 582,427,930 --repo_output_dir "./merge_conflict_analysis_diffs" + +: Comma-separated list of commit indices to analyze. Example: 582,427,930. +: The directory where the bulk analysis results will be saved. diff --git a/src/python/diff3_analysis.py b/src/python/diff3_analysis.py index 336e1e78ea..5b120a5ecc 100644 --- a/src/python/diff3_analysis.py +++ b/src/python/diff3_analysis.py @@ -1,6 +1,8 @@ """Runs a merge and uses diff3 to compare it to the base and final branch of a given repo. """ +import sys +import argparse import subprocess import re import os @@ -142,3 +144,37 @@ def diff3_analysis(merge_tool: str, results_index: int, repo_output_dir): # Optionally, print or log the path of the diff file print(f"Diff results saved to {diff_filename}") + + +def main(merge_tool: str, results_index: int, repo_output_dir: str): + """ + Entry point for the script when run from the command line. + """ + # Convert results_index to int here if using argparse + diff3_analysis(merge_tool, results_index, repo_output_dir) + + +if __name__ == "__main__": + # Use argparse to parse command line arguments + parser = argparse.ArgumentParser( + description="Analyze merge conflicts using the diff3 tool." + ) + parser.add_argument("merge_tool", type=str, help="The merge tool to be used.") + parser.add_argument( + "results_index", + type=int, + help="The index of the repository in the results DataFrame.", + ) + parser.add_argument( + "repo_output_dir", + type=str, + help="The path of where we want to store the results from the analysis.", + ) + + args = parser.parse_args() + + # Ensure the output directory exists + os.makedirs(args.repo_output_dir, exist_ok=True) + + # Call main function with parsed arguments + main(args.merge_tool, args.results_index, args.repo_output_dir) diff --git a/src/python/run_diff3_analysis.py b/src/python/run_diff3_analysis.py index d5ff7d3342..bdb1fc1c58 100644 --- a/src/python/run_diff3_analysis.py +++ b/src/python/run_diff3_analysis.py @@ -1,9 +1,12 @@ -"""Recreates merges on all algorithms with a sample of commits. +"""Recreates merges on selection of algorithms with a selection of commits. """ +import sys +import argparse import os from diff3_analysis import diff3_analysis + # Mixed conflict and pass examples from results_greatest_hits/result.csv # Randomly chosen sample of mixed results from dataset row_nums = [ @@ -45,8 +48,9 @@ 900, ] + # All merge tools -merge_tools = [ +all_merge_tools = [ "gitmerge_ort", "gitmerge_ort_adjacent", "gitmerge_ort_ignorespace", @@ -64,21 +68,76 @@ ] -def run_analysis(): +# Default output directory for storing diff .txt files +base_output_dir = "./merge_conflict_analysis_diffs" + + +def run_analysis( + rows=row_nums, merge_tools=all_merge_tools, output_dir=base_output_dir +): """ Analyzes merge conflicts on a sample of repos with all merge algorithms. + Returns: None """ - # Ensure the base output directory exists - base_output_dir = "./merge_conflict_analysis_diffs" - # Loop through each conflict, recreating merges to repo_output_dir - for row_num in row_nums: + for row_num in rows: for merge_tool in merge_tools: # Create a subdirectory for this specific results_index repo_output_dir = os.path.join(base_output_dir, str(row_num), merge_tool) os.makedirs(repo_output_dir, exist_ok=True) + print(merge_tool) + print(row_num) + print(repo_output_dir) diff3_analysis(merge_tool, row_num, repo_output_dir) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Run merge conflict analysis with optional parameters." + ) + + # Make arguments optional and provide default values + parser.add_argument( + "-m", + "--merge_tool", + type=str, + nargs="*", + choices=all_merge_tools, + default=all_merge_tools, + help="Comma-separated list of merge tools to be used. By default, all tools will be used.", + ) + parser.add_argument( + "-i", + "--results_index", + type=str, + default=None, + help="Comma-separated list of indices of repositories in results. Default: random list", + ) + parser.add_argument( + "-o", + "--repo_output_dir", + type=str, + default=base_output_dir, + help="Path to store results from analysis. Default: './merge_conflict_analysis_diffs'.", + ) + + args = parser.parse_args() + + # Parse the results_index to list of integers if provided + rows_to_use = ( + [int(index) for index in args.results_index.split(",")] + if args.results_index + else row_nums + ) + + # Merge tools are directly accepted as a list due to nargs='*' + tools_to_use = args.merge_tool + + os.makedirs(args.repo_output_dir, exist_ok=True) + run_analysis( + rows=rows_to_use, merge_tools=tools_to_use, output_dir=args.repo_output_dir + )