From 98095198717618eaaa8f6f21560fd62f74301d1a Mon Sep 17 00:00:00 2001 From: cactusbranch01 Date: Thu, 14 Dec 2023 11:55:31 -0800 Subject: [PATCH] diff3 changes on new dataset --- src/python/diff3_analysis.ipynb | 29 ++++++++++----- src/python/diff3_analysis.py | 58 ++++++++++++++++++----------- src/python/repo.py | 2 +- src/scripts/merge_tools/gitmerge.sh | 1 - 4 files changed, 57 insertions(+), 33 deletions(-) diff --git a/src/python/diff3_analysis.ipynb b/src/python/diff3_analysis.ipynb index ee9f4d11a3..36700392ce 100644 --- a/src/python/diff3_analysis.ipynb +++ b/src/python/diff3_analysis.ipynb @@ -2,30 +2,41 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "dropwizard/metrics : Cloning repo\n", + "dropwizard/metrics : Finished cloning\n", + "dropwizard/metrics : Finished cloning\n", + "229c8236f0764ab62887afccd4dcb4928ed6de5c\n", + "\n", "dropwizard/metrics : Cloning repo\n", "dropwizard/metrics : Finished cloning\n", "dropwizard/metrics : Finished cloning\n" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "Switched to branch 'TEMP_LEFT_BRANCH'\n", - "diff: ./repos/merge_attempt/dropwizard/metrics/pom.xml: No such file or directory\n", - "diff: ./repos/programmer_merge/dropwizard/metrics/pom.xml: No such file or directory\n" + "ename": "GitCommandError", + "evalue": "Cmd('git') failed due to: exit code(1)\n cmdline: git checkout --force 229c8236f0764ab62887afccd4dcb4928ed6de5c\n\n stderr: 'error: pathspec '229c8236f0764ab62887afccd4dcb4928ed6de5c\n' did not match any file(s) known to git'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mGitCommandError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 18\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# merge_tool = \"gitmerge_ort_adjacent\"\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# merge_tool = \"gitmerge_ort_ignorespace\"\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# merge_tool = \"gitmerge_ort_imports\"\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# merge_tool = \"spork\"\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# merge_tool = \"intellimerge\"\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdiff3_analysis\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m diff3_analysis\n\u001b[0;32m---> 18\u001b[0m \u001b[43mdiff3_analysis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmerge_tool\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrow_num\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/GitHub/AST-Merging-Ben-Analysis/src/python/diff3_analysis.py:60\u001b[0m, in \u001b[0;36mdiff3_analysis\u001b[0;34m(merge_tool, repo_num)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28mprint\u001b[39m(base_sha\u001b[38;5;241m.\u001b[39mstdout)\n\u001b[1;32m 57\u001b[0m repo \u001b[38;5;241m=\u001b[39m clone_repo_to_path(\n\u001b[1;32m 58\u001b[0m repo_name, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./repos/base\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 59\u001b[0m ) \u001b[38;5;66;03m# Return a Git-Python repo object\u001b[39;00m\n\u001b[0;32m---> 60\u001b[0m \u001b[43mrepo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgit\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheckout\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_sha\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstdout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 61\u001b[0m repo\u001b[38;5;241m.\u001b[39msubmodule_update()\n\u001b[1;32m 63\u001b[0m result \u001b[38;5;241m=\u001b[39m subprocess\u001b[38;5;241m.\u001b[39mrun(\n\u001b[1;32m 64\u001b[0m [\n\u001b[1;32m 65\u001b[0m script,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 71\u001b[0m text\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 72\u001b[0m )\n", + "File \u001b[0;32m~/miniconda/envs/research/lib/python3.8/site-packages/git/cmd.py:736\u001b[0m, in \u001b[0;36mGit.__getattr__..\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 734\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 735\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m LazyMixin\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__getattr__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name)\n\u001b[0;32m--> 736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mlambda\u001b[39;00m \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_process\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda/envs/research/lib/python3.8/site-packages/git/cmd.py:1316\u001b[0m, in \u001b[0;36mGit._call_process\u001b[0;34m(self, method, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1313\u001b[0m call\u001b[38;5;241m.\u001b[39mappend(dashify(method))\n\u001b[1;32m 1314\u001b[0m call\u001b[38;5;241m.\u001b[39mextend(args_list)\n\u001b[0;32m-> 1316\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcall\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mexec_kwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda/envs/research/lib/python3.8/site-packages/git/cmd.py:1111\u001b[0m, in \u001b[0;36mGit.execute\u001b[0;34m(self, command, istream, with_extended_output, with_exceptions, as_process, output_stream, stdout_as_string, kill_after_timeout, with_stdout, universal_newlines, shell, env, max_chunk_size, strip_newline_in_stdout, **subprocess_kwargs)\u001b[0m\n\u001b[1;32m 1108\u001b[0m \u001b[38;5;66;03m# END handle debug printing\u001b[39;00m\n\u001b[1;32m 1110\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m with_exceptions \u001b[38;5;129;01mand\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1111\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m GitCommandError(redacted_command, status, stderr_value, stdout_value)\n\u001b[1;32m 1113\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(stdout_value, \u001b[38;5;28mbytes\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m stdout_as_string: \u001b[38;5;66;03m# could also be output_stream\u001b[39;00m\n\u001b[1;32m 1114\u001b[0m stdout_value \u001b[38;5;241m=\u001b[39m safe_decode(stdout_value)\n", + "\u001b[0;31mGitCommandError\u001b[0m: Cmd('git') failed due to: exit code(1)\n cmdline: git checkout --force 229c8236f0764ab62887afccd4dcb4928ed6de5c\n\n stderr: 'error: pathspec '229c8236f0764ab62887afccd4dcb4928ed6de5c\n' did not match any file(s) known to git'" ] } ], "source": [ - "repo_num = 548\n", + "row_num = 548\n", "merge_tool = \"gitmerge_ort\"\n", "# merge_tool = \"gitmerge_ort_adjacent\"\n", "# merge_tool = \"gitmerge_ort_ignorespace\"\n", @@ -42,7 +53,7 @@ "# merge_tool = \"intellimerge\"\n", "\n", "from diff3_analysis import diff3_analysis\n", - "diff3_analysis(merge_tool, repo_num)" + "diff3_analysis(merge_tool, row_num)" ] } ], diff --git a/src/python/diff3_analysis.py b/src/python/diff3_analysis.py index 4cc961e6be..ef61265f76 100644 --- a/src/python/diff3_analysis.py +++ b/src/python/diff3_analysis.py @@ -3,6 +3,7 @@ import subprocess import re import os +import shutil import tempfile import pandas as pd from repo import clone_repo_to_path @@ -22,6 +23,8 @@ def diff3_analysis(merge_tool: str, repo_num: int): Returns: None """ + shutil.rmtree("./repos", ignore_errors=True) + df = pd.read_csv("../../results_greatest_hits/result.csv") repo_name = df.iloc[repo_num]["repository"] @@ -38,6 +41,25 @@ def diff3_analysis(merge_tool: str, repo_num: int): repo.submodule_update() repo.git.checkout("-b", "TEMP_RIGHT_BRANCH", force=True) + + base_sha = subprocess.run( + [ + "git", + "merge-base", + "TEMP_LEFT_BRANCH", + "TEMP_RIGHT_BRANCH", + ], + cwd="./repos/merge_attempt/" + repo_name, + stdout=subprocess.PIPE, + text=True, + ) + print(base_sha.stdout) + repo = clone_repo_to_path( + repo_name, "./repos/base" + ) # Return a Git-Python repo object + repo.git.checkout(base_sha.stdout, force=True) + repo.submodule_update() + result = subprocess.run( [ script, @@ -53,19 +75,14 @@ def diff3_analysis(merge_tool: str, repo_num: int): r"CONFLICT \(.+\): Merge conflict in (.+)", result.stdout ) + print(result.stdout) + repo = clone_repo_to_path( repo_name, "./repos/programmer_merge" ) # Return a Git-Python repo object repo.git.checkout(df.iloc[repo_num]["merge"], force=True) repo.submodule_update() - ''' - repo = clone_repo_to_path( - repo_name, "./repos/base" - ) # Return a Git-Python repo object - repo.git.checkout(df.iloc[repo_num]["base"], force=True) - repo.submodule_update() - ''' for conflict_file_match in conflict_file_matches: conflicting_file = str(conflict_file_match) @@ -74,13 +91,11 @@ def diff3_analysis(merge_tool: str, repo_num: int): "./repos/merge_attempt", conflict_path ) - ''' conflict_path_base = os.path.join("./repos/base", conflict_path) - ''' conflict_path_programmer_merge = os.path.join( "./repos/programmer_merge", conflict_path ) - ''' + diff_results = subprocess.run( [ "diff3", @@ -96,17 +111,16 @@ def diff3_analysis(merge_tool: str, repo_num: int): # Check that diff3 didn't run into missing files in the base error_message = "No such file or directory" if error_message in diff_results.stderr: - ''' - # Since the conflict file was added in both parents we can't diff the base. - diff_results = subprocess.run( - [ - "diff", - conflict_path_merge_attempt, - conflict_path_programmer_merge, - ], - stdout=subprocess.PIPE, - text=True, - ) + # Since the conflict file was added in both parents we can't diff the base. + diff_results = subprocess.run( + [ + "diff", + conflict_path_merge_attempt, + conflict_path_programmer_merge, + ], + stdout=subprocess.PIPE, + text=True, + ) # Use a temporary file to store the diff results with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: @@ -126,7 +140,6 @@ def diff3_analysis(merge_tool: str, repo_num: int): stderr=subprocess.PIPE, stdout=subprocess.PIPE, ) - ''' subprocess.run( ["rm", "-rf", "./repos/merge_attempt"], stderr=subprocess.PIPE, @@ -137,3 +150,4 @@ def diff3_analysis(merge_tool: str, repo_num: int): stderr=subprocess.PIPE, stdout=subprocess.PIPE, ) + ''' diff --git a/src/python/repo.py b/src/python/repo.py index 9e2ce64170..91bc307306 100755 --- a/src/python/repo.py +++ b/src/python/repo.py @@ -62,7 +62,7 @@ def clone_repo_to_path(repo_slug: str, path: str) -> git.repo.Repo: Args: repo_slug (str): The slug of the repository, which is "owner/reponame". """ - repo_dir = REPOS_PATH / Path(repo_slug) + repo_dir = Path(path) / Path(repo_slug) if repo_dir.exists(): repo = git.repo.Repo(repo_dir) else: diff --git a/src/scripts/merge_tools/gitmerge.sh b/src/scripts/merge_tools/gitmerge.sh index 14525b86ab..127655fe49 100755 --- a/src/scripts/merge_tools/gitmerge.sh +++ b/src/scripts/merge_tools/gitmerge.sh @@ -33,7 +33,6 @@ retVal=$? # report conflicts if [ $retVal -ne 0 ]; then echo "Conflict" - git merge --abort fi exit $retVal