Added command line run analysis arguments

Created functionality for running the analysis in the command line instead of using a Jupyter nb
cactusbranch01 · Mar 22, 2024 · f35269d · f35269d
1 parent 4e7771b
commit f35269d
Show file tree

Hide file tree

Showing 3 changed files with 155 additions and 7 deletions.
diff --git a/src/python/README b/src/python/README
@@ -0,0 +1,53 @@
+# Python Scripts for Merge Conflict Analysis
+
+
+This directory contains Python scripts designed to facilitate the analysis of merge conflicts using various merge tools. The scripts allow users to recreate merges, analyze conflicts, and compare different merge algorithms' effectiveness.
+
+
+## Scripts Overview
+
+
+- `diff3_analysis.py`: This script analyzes merge conflicts for a single specified merge tool and commit.
+- `run_diff3_analysis.py`: This script automates the analysis across multiple commits and merge tools, aggregating the results.
+
+
+## Prerequisites
+
+
+- Python 3.x installed on your system.
+- Necessary Python packages installed (e.g., `pandas`, `GitPython`).
+
+
+## Usage
+
+
+### Analyzing a Single Merge Conflict
+
+
+To analyze merge conflicts using a specific merge tool for a single commit:
+
+python3 diff3_analysis.py <merge_tool> <results_index> <output_directory>
+
+
+Ex:
+
+python3 diff3_analysis.py gitmerge_ort 582 ./merge_conflict_analysis_diffs/582/gitmerge_ort
+
+
+<merge_tool>: The merge tool to use for the analysis (e.g., gitmerge_ort).
+<results_index>: The index of the commit in the dataset.
+<output_directory>: The directory where the analysis results will be saved.
+
+
+Running Bulk Analysis
+To run the analysis over multiple commits and all merge tools:
+
+python3 run_diff3_analysis.py --results_index <indexes> --repo_output_dir "<output_directory>"
+
+
+Ex:
+
+python3 run_diff3_analysis.py --results_index 582,427,930 --repo_output_dir "./merge_conflict_analysis_diffs"
+
+<indexes>: Comma-separated list of commit indices to analyze. Example: 582,427,930.
+<output_directory>: The directory where the bulk analysis results will be saved.
diff --git a/src/python/diff3_analysis.py b/src/python/diff3_analysis.py
@@ -1,6 +1,8 @@
 """Runs a merge and uses diff3 to compare it to the base and final branch of a given repo.
 """
 
+import sys
+import argparse
 import subprocess
 import re
 import os
@@ -142,3 +144,37 @@ def diff3_analysis(merge_tool: str, results_index: int, repo_output_dir):
 
         # Optionally, print or log the path of the diff file
         print(f"Diff results saved to {diff_filename}")
+
+
+def main(merge_tool: str, results_index: int, repo_output_dir: str):
+    """
+    Entry point for the script when run from the command line.
+    """
+    # Convert results_index to int here if using argparse
+    diff3_analysis(merge_tool, results_index, repo_output_dir)
+
+
+if __name__ == "__main__":
+    # Use argparse to parse command line arguments
+    parser = argparse.ArgumentParser(
+        description="Analyze merge conflicts using the diff3 tool."
+    )
+    parser.add_argument("merge_tool", type=str, help="The merge tool to be used.")
+    parser.add_argument(
+        "results_index",
+        type=int,
+        help="The index of the repository in the results DataFrame.",
+    )
+    parser.add_argument(
+        "repo_output_dir",
+        type=str,
+        help="The path of where we want to store the results from the analysis.",
+    )
+
+    args = parser.parse_args()
+
+    # Ensure the output directory exists
+    os.makedirs(args.repo_output_dir, exist_ok=True)
+
+    # Call main function with parsed arguments
+    main(args.merge_tool, args.results_index, args.repo_output_dir)
diff --git a/src/python/run_diff3_analysis.py b/src/python/run_diff3_analysis.py
@@ -1,9 +1,12 @@
-"""Recreates merges on all algorithms with a sample of commits.
+"""Recreates merges on selection of algorithms with a selection of commits.
 """
 
+import sys
+import argparse
 import os
 from diff3_analysis import diff3_analysis
 
+
 # Mixed conflict and pass examples from results_greatest_hits/result.csv
 # Randomly chosen sample of mixed results from dataset
 row_nums = [
@@ -45,8 +48,9 @@
     900,
 ]
 
+
 # All merge tools
-merge_tools = [
+all_merge_tools = [
     "gitmerge_ort",
     "gitmerge_ort_adjacent",
     "gitmerge_ort_ignorespace",
@@ -64,21 +68,76 @@
 ]
 
 
-def run_analysis():
+# Default output directory for storing diff .txt files
+base_output_dir = "./merge_conflict_analysis_diffs"
+
+
+def run_analysis(
+    rows=row_nums, merge_tools=all_merge_tools, output_dir=base_output_dir
+):
     """
     Analyzes merge conflicts on a sample of repos with all merge algorithms.
 
+
     Returns:
         None
     """
 
-    # Ensure the base output directory exists
-    base_output_dir = "./merge_conflict_analysis_diffs"
-
     # Loop through each conflict, recreating merges to repo_output_dir
-    for row_num in row_nums:
+    for row_num in rows:
         for merge_tool in merge_tools:
             # Create a subdirectory for this specific results_index
             repo_output_dir = os.path.join(base_output_dir, str(row_num), merge_tool)
             os.makedirs(repo_output_dir, exist_ok=True)
+            print(merge_tool)
+            print(row_num)
+            print(repo_output_dir)
             diff3_analysis(merge_tool, row_num, repo_output_dir)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Run merge conflict analysis with optional parameters."
+    )
+
+    # Make arguments optional and provide default values
+    parser.add_argument(
+        "-m",
+        "--merge_tool",
+        type=str,
+        nargs="*",
+        choices=all_merge_tools,
+        default=all_merge_tools,
+        help="Comma-separated list of merge tools to be used. By default, all tools will be used.",
+    )
+    parser.add_argument(
+        "-i",
+        "--results_index",
+        type=str,
+        default=None,
+        help="Comma-separated list of indices of repositories in results. Default: random list",
+    )
+    parser.add_argument(
+        "-o",
+        "--repo_output_dir",
+        type=str,
+        default=base_output_dir,
+        help="Path to store results from analysis. Default: './merge_conflict_analysis_diffs'.",
+    )
+
+    args = parser.parse_args()
+
+    # Parse the results_index to list of integers if provided
+    rows_to_use = (
+        [int(index) for index in args.results_index.split(",")]
+        if args.results_index
+        else row_nums
+    )
+
+    # Merge tools are directly accepted as a list due to nargs='*'
+    tools_to_use = args.merge_tool
+
+    os.makedirs(args.repo_output_dir, exist_ok=True)
+    run_analysis(
+        rows=rows_to_use, merge_tools=tools_to_use, output_dir=args.repo_output_dir
+    )