Skip to content

Commit

Permalink
Added command line run analysis arguments
Browse files Browse the repository at this point in the history
Created functionality for running the analysis in the command line instead of using a Jupyter nb
  • Loading branch information
cactusbranch01 committed Mar 22, 2024
1 parent 4e7771b commit f35269d
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 7 deletions.
53 changes: 53 additions & 0 deletions src/python/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Python Scripts for Merge Conflict Analysis


This directory contains Python scripts designed to facilitate the analysis of merge conflicts using various merge tools. The scripts allow users to recreate merges, analyze conflicts, and compare different merge algorithms' effectiveness.


## Scripts Overview


- `diff3_analysis.py`: This script analyzes merge conflicts for a single specified merge tool and commit.
- `run_diff3_analysis.py`: This script automates the analysis across multiple commits and merge tools, aggregating the results.


## Prerequisites


- Python 3.x installed on your system.
- Necessary Python packages installed (e.g., `pandas`, `GitPython`).


## Usage


### Analyzing a Single Merge Conflict


To analyze merge conflicts using a specific merge tool for a single commit:

python3 diff3_analysis.py <merge_tool> <results_index> <output_directory>


Ex:

python3 diff3_analysis.py gitmerge_ort 582 ./merge_conflict_analysis_diffs/582/gitmerge_ort


<merge_tool>: The merge tool to use for the analysis (e.g., gitmerge_ort).
<results_index>: The index of the commit in the dataset.
<output_directory>: The directory where the analysis results will be saved.


Running Bulk Analysis
To run the analysis over multiple commits and all merge tools:

python3 run_diff3_analysis.py --results_index <indexes> --repo_output_dir "<output_directory>"


Ex:

python3 run_diff3_analysis.py --results_index 582,427,930 --repo_output_dir "./merge_conflict_analysis_diffs"

<indexes>: Comma-separated list of commit indices to analyze. Example: 582,427,930.
<output_directory>: The directory where the bulk analysis results will be saved.
36 changes: 36 additions & 0 deletions src/python/diff3_analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Runs a merge and uses diff3 to compare it to the base and final branch of a given repo.
"""

import sys
import argparse
import subprocess
import re
import os
Expand Down Expand Up @@ -142,3 +144,37 @@ def diff3_analysis(merge_tool: str, results_index: int, repo_output_dir):

# Optionally, print or log the path of the diff file
print(f"Diff results saved to {diff_filename}")


def main(merge_tool: str, results_index: int, repo_output_dir: str):
"""
Entry point for the script when run from the command line.
"""
# Convert results_index to int here if using argparse
diff3_analysis(merge_tool, results_index, repo_output_dir)


if __name__ == "__main__":
# Use argparse to parse command line arguments
parser = argparse.ArgumentParser(
description="Analyze merge conflicts using the diff3 tool."
)
parser.add_argument("merge_tool", type=str, help="The merge tool to be used.")
parser.add_argument(
"results_index",
type=int,
help="The index of the repository in the results DataFrame.",
)
parser.add_argument(
"repo_output_dir",
type=str,
help="The path of where we want to store the results from the analysis.",
)

args = parser.parse_args()

# Ensure the output directory exists
os.makedirs(args.repo_output_dir, exist_ok=True)

# Call main function with parsed arguments
main(args.merge_tool, args.results_index, args.repo_output_dir)
73 changes: 66 additions & 7 deletions src/python/run_diff3_analysis.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""Recreates merges on all algorithms with a sample of commits.
"""Recreates merges on selection of algorithms with a selection of commits.
"""

import sys
import argparse
import os
from diff3_analysis import diff3_analysis


# Mixed conflict and pass examples from results_greatest_hits/result.csv
# Randomly chosen sample of mixed results from dataset
row_nums = [
Expand Down Expand Up @@ -45,8 +48,9 @@
900,
]


# All merge tools
merge_tools = [
all_merge_tools = [
"gitmerge_ort",
"gitmerge_ort_adjacent",
"gitmerge_ort_ignorespace",
Expand All @@ -64,21 +68,76 @@
]


def run_analysis():
# Default output directory for storing diff .txt files
base_output_dir = "./merge_conflict_analysis_diffs"


def run_analysis(
rows=row_nums, merge_tools=all_merge_tools, output_dir=base_output_dir
):
"""
Analyzes merge conflicts on a sample of repos with all merge algorithms.
Returns:
None
"""

# Ensure the base output directory exists
base_output_dir = "./merge_conflict_analysis_diffs"

# Loop through each conflict, recreating merges to repo_output_dir
for row_num in row_nums:
for row_num in rows:
for merge_tool in merge_tools:
# Create a subdirectory for this specific results_index
repo_output_dir = os.path.join(base_output_dir, str(row_num), merge_tool)
os.makedirs(repo_output_dir, exist_ok=True)
print(merge_tool)
print(row_num)
print(repo_output_dir)
diff3_analysis(merge_tool, row_num, repo_output_dir)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Run merge conflict analysis with optional parameters."
)

# Make arguments optional and provide default values
parser.add_argument(
"-m",
"--merge_tool",
type=str,
nargs="*",
choices=all_merge_tools,
default=all_merge_tools,
help="Comma-separated list of merge tools to be used. By default, all tools will be used.",
)
parser.add_argument(
"-i",
"--results_index",
type=str,
default=None,
help="Comma-separated list of indices of repositories in results. Default: random list",
)
parser.add_argument(
"-o",
"--repo_output_dir",
type=str,
default=base_output_dir,
help="Path to store results from analysis. Default: './merge_conflict_analysis_diffs'.",
)

args = parser.parse_args()

# Parse the results_index to list of integers if provided
rows_to_use = (
[int(index) for index in args.results_index.split(",")]
if args.results_index
else row_nums
)

# Merge tools are directly accepted as a list due to nargs='*'
tools_to_use = args.merge_tool

os.makedirs(args.repo_output_dir, exist_ok=True)
run_analysis(
rows=rows_to_use, merge_tools=tools_to_use, output_dir=args.repo_output_dir
)

0 comments on commit f35269d

Please sign in to comment.