Skip to content

Commit

Permalink
Fixed scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
benedikt-schesch committed May 21, 2024
1 parent a606032 commit 2290887
Show file tree
Hide file tree
Showing 10 changed files with 410 additions and 44 deletions.
119 changes: 119 additions & 0 deletions results/combined/inconsistent_results.csv

Large diffs are not rendered by default.

105 changes: 78 additions & 27 deletions src/python/latex_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,45 @@
}


def check_fingerprint_consistency(result_df: pd.DataFrame, merge_tools: List[str]):
"""Check if the fingerprints are consistent.
Args:
result_df: DataFrame containing the results of the merge tools
merge_tools: list of merge tools
"""
for merge_tool1 in merge_tools:
for merge_tool2 in merge_tools:
if merge_tool1 == "gitmerge_resolve" or merge_tool2 == "gitmerge_resolve":
continue
# ignore adajcent
if (
merge_tool1 == "gitmerge_ort_adjacent"
or merge_tool2 == "gitmerge_ort_adjacent"
):
continue
if merge_tool1 != merge_tool2:
# Check if fingerprints are the same
same_fingerprint_mask = (
result_df[merge_tool1 + "_merge_fingerprint"]
== result_df[merge_tool2 + "_merge_fingerprint"]
)

# Check if results are the same
same_result_mask = result_df[merge_tool1] == result_df[merge_tool2]

# Check if the fingerprints are the same but the results are different
inconsistent_mask = same_fingerprint_mask & ~same_result_mask
if inconsistent_mask.sum() > 0:
logger.error(
f"Inconsistency found between {merge_tool1} and {merge_tool2} in {inconsistent_mask.sum()} cases."
)
logger.error(result_df[inconsistent_mask])
assert (
inconsistent_mask.sum() == 0
), f"Inconsistency found between {merge_tool1} and {merge_tool2} in {inconsistent_mask.sum()} cases."


def merge_tool_latex_name(name: str) -> str:
"""Return the LaTeX name of a merge tool.
Args:
Expand Down Expand Up @@ -385,44 +424,56 @@ def merge_two_states(
Path(plots_output_path).mkdir(parents=True, exist_ok=True)
Path(tables_output_path).mkdir(parents=True, exist_ok=True)

check_fingerprint_consistency(result_df, merge_tools)

# Figure Heat map diffing
result = np.zeros((len(merge_tools), len(merge_tools)))
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TimeElapsedColumn(),
TimeRemainingColumn(),
) as progress:
task = progress.add_task("Processing merges...", total=len(result_df))
for _, row in result_df.iterrows():
progress.update(task, advance=1)
for idx, merge_tool1 in enumerate(merge_tools):
for idx2, merge_tool2 in enumerate(merge_tools[(idx + 1) :]):
if (
row[merge_tool1] in MERGE_CORRECT_NAMES
or row[merge_tool2] in MERGE_CORRECT_NAMES
):
if (
row[merge_tool1 + "_merge_fingerprint"]
!= row[merge_tool2 + "_merge_fingerprint"]
):
result[idx][idx2 + idx + 1] += 1
result[idx2 + idx + 1][idx] += 1
result = pd.DataFrame(
{
merge_tool: {merge_tool: 0 for merge_tool in merge_tools}
for merge_tool in merge_tools
}
)
for merge_tool1 in merge_tools:
for merge_tool2 in merge_tools:
# Mask for different fingerprints
mask_diff_fingerprint = (
result_df[merge_tool1 + "_merge_fingerprint"]
!= result_df[merge_tool2 + "_merge_fingerprint"]
)

# Mask if one of the results is in correct or incorrect names
merge_name_flags1 = result_df[merge_tool1].isin(
MERGE_CORRECT_NAMES + MERGE_INCORRECT_NAMES
)
merge_name_flags2 = result_df[merge_tool2].isin(
MERGE_CORRECT_NAMES + MERGE_INCORRECT_NAMES
)
mask_merge_name = merge_name_flags1 | merge_name_flags2

if merge_tool1 == "intellimerge" and merge_tool2 == "gitmerge_ort":
print(mask_diff_fingerprint)
print(mask_merge_name)
print((mask_diff_fingerprint & mask_merge_name).sum())

# Calculate the result
result.loc[merge_tool1, merge_tool2] = (
mask_diff_fingerprint & mask_merge_name
).sum()

# Transform the result into a numpy array
_, ax = plt.subplots(figsize=(8, 6))
result = np.tril(result)
result_array = np.tril(result.to_numpy())
latex_merge_tool = [
"\\mbox{" + merge_tool_latex_name(i) + "}" for i in merge_tools
"\\mbox{" + merge_tool_latex_name(i) + "}" for i in result.columns
]
with warnings.catch_warnings():
warnings.simplefilter("ignore")
heatmap = sns.heatmap(
result / len(result_df),
result_array,
annot=True,
ax=ax,
xticklabels=latex_merge_tool, # type: ignore
yticklabels=latex_merge_tool, # type: ignore
fmt=".3f",
mask=np.triu(np.ones_like(result, dtype=bool), k=1),
cmap="Blues",
annot_kws={"size": 6},
Expand Down
20 changes: 16 additions & 4 deletions src/python/replay_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def merge_replay(
delete_workdir=False,
lazy_clone=False,
)
repo.checkout(merge_data["left"])
repo.checkout(merge_data["left"], use_cache=False)

workdir = Path(f"{repo_slug}-merge-input-right")
if not (WORKDIR_DIRECTORY / workdir).exists():
Expand All @@ -114,7 +114,7 @@ def merge_replay(
delete_workdir=False,
lazy_clone=False,
)
repo.checkout(merge_data["right"])
repo.checkout(merge_data["right"], use_cache=False)

workdir = Path(f"{repo_slug}-merge-input-base")
if not (WORKDIR_DIRECTORY / workdir).exists():
Expand All @@ -129,7 +129,7 @@ def merge_replay(
["git", "merge-base", merge_data["left"], merge_data["right"]],
stdout=subprocess.PIPE,
).stdout.decode("utf-8")
repo.checkout(base_commit)
repo.checkout(base_commit, use_cache=False)

workdir = Path(f"{repo_slug}-merge-input-programmer")
if not (WORKDIR_DIRECTORY / workdir).exists():
Expand All @@ -140,7 +140,7 @@ def merge_replay(
delete_workdir=False,
lazy_clone=False,
)
repo.checkout(merge_data["merge"])
repo.checkout(merge_data["merge"], use_cache=False)

for merge_tool in MERGE_TOOL:
progress.update(task, advance=1)
Expand Down Expand Up @@ -201,6 +201,18 @@ def merge_replay(
use_cache=False,
)
assert repo.local_repo_path.exists()
if merge_result in (MERGE_STATE.Merge_failed, MERGE_STATE.Merge_success):
# Run 'git diff --name-only --diff-filter=U' to get the files with conflicts
conflict_files = subprocess.run(
["git", "diff", "--name-only", "--diff-filter=U"],
cwd=repo.local_repo_path,
stdout=subprocess.PIPE,
).stdout.decode("utf-8")
is_conflict = len(conflict_files) > 0
assert (
is_conflict == (merge_result == MERGE_STATE.Merge_failed)
), f"merge_replay: merge_result {merge_result} does not match conflict_files {conflict_files}"

root_dir = Path("replay_logs")
log_path = root_dir / Path(
"merges/"
Expand Down
107 changes: 107 additions & 0 deletions src/python/utils/build_inconsistent_merges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
"""Check for inconsistent results between merge tools and output them to a CSV.
usage: python3 check_inconsistencies.py --result_csv <path_to_result_csv> --output_csv <path_to_output_csv>
This script reads a result CSV file, checks for inconsistencies between merge tools based on fingerprints,
and writes the inconsistent results to a new CSV file.
Arguments:
- result_csv: path to the result CSV file
- output_csv: path to the output CSV file where inconsistent results will be saved
"""

import argparse
import pandas as pd
from typing import List


def check_fingerprint_consistency(
result_df: pd.DataFrame, merge_tools: List[str]
) -> pd.DataFrame:
"""Check if the fingerprints are consistent and return inconsistent results.
Args:
result_df: DataFrame containing the results of the merge tools
merge_tools: list of merge tools
Returns:
DataFrame with inconsistent results
"""
inconsistencies = []

for merge_tool1 in merge_tools:
for merge_tool2 in merge_tools:
if merge_tool1 == "gitmerge_resolve" or merge_tool2 == "gitmerge_resolve":
continue
# ignore adjacent
if (
merge_tool1 == "gitmerge_ort_adjacent"
or merge_tool2 == "gitmerge_ort_adjacent"
):
continue
if merge_tool1 != merge_tool2:
# Check if fingerprints are the same
same_fingerprint_mask = (
result_df[merge_tool1 + "_merge_fingerprint"]
== result_df[merge_tool2 + "_merge_fingerprint"]
)

# Check if results are the same
same_result_mask = result_df[merge_tool1] == result_df[merge_tool2]

# Check if the fingerprints are the same but the results are different
inconsistent_mask = same_fingerprint_mask & ~same_result_mask
if inconsistent_mask.sum() > 0:
inconsistent_results = result_df[inconsistent_mask].copy()
inconsistent_results["tool1"] = merge_tool1
inconsistent_results["tool2"] = merge_tool2
inconsistencies.append(result_df[inconsistent_mask])

if inconsistencies:
return pd.concat(inconsistencies).drop_duplicates()
else:
return pd.DataFrame()


def main():
"""Main function"""
parser = argparse.ArgumentParser()
parser.add_argument(
"--result_csv",
type=str,
help="Path to the result CSV file",
default="results/combined/result.csv",
)
parser.add_argument(
"--output_csv",
type=str,
help="Path to the output CSV file for inconsistent results",
default="results/combined/inconsistent_results.csv",
)
args = parser.parse_args()

# Read the result CSV file
result_df = pd.read_csv(args.result_csv, index_col="idx")

# List of merge tools
merge_tools = [
col.split("_merge_fingerprint")[0]
for col in result_df.columns
if col.endswith("_merge_fingerprint")
]

# Check for inconsistencies
inconsistent_results = check_fingerprint_consistency(result_df, merge_tools)

if not inconsistent_results.empty:
# Write the inconsistent results to the output CSV file
inconsistent_results.to_csv(args.output_csv, index_label="idx")
print(f"Inconsistent results have been saved to {args.output_csv}")
print("Number of inconsistencies found:", len(inconsistent_results))
else:
print("No inconsistencies found.")


if __name__ == "__main__":
main()
File renamed without changes.
62 changes: 62 additions & 0 deletions src/python/utils/delete_intellimerge_keys_from_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
"""Delete the keys containing 'imports' in the JSON files in the given directory."""

import os
import json


def count_import_keys(directory):
"""Count the number of keys containing 'imports' in the JSON files in the given directory."""
count = 0
for root, _, files in os.walk(directory):
json_files = [f for f in files if f.endswith(".json")]
for json_file in json_files:
file_path = os.path.join(root, json_file)
with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file)

# Count keys containing 'adjacent'
keys_to_delete = [key for key in data if "intellimerge" in key]
count += len(keys_to_delete)
return count


def delete_import_keys(directory):
"""Delete the keys containing 'imports' in the JSON files in the given directory."""
total_deleted = 0
for root, _, files in os.walk(directory):
json_files = [f for f in files if f.endswith(".json")]
for json_file in json_files:
file_path = os.path.join(root, json_file)
with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file)

# Record keys to delete
keys_to_delete = [key for key in data.keys() if "intellimerge" in key]
if keys_to_delete:
for key in keys_to_delete:
del data[key]
total_deleted += 1

# Save the modified data back to file
with open(file_path, "w", encoding="utf-8") as file:
json.dump(data, file, indent=4)

return total_deleted


def main():
"""Main function."""
directory = "cache"
potential_deletions = count_import_keys(directory)
print(f"Potential deletions: {potential_deletions}")
confirm = input("Do you want to proceed with deleting these keys? (yes/no): ")
if confirm.lower() == "yes":
total_deleted = delete_import_keys(directory)
print(f"Total keys deleted: {total_deleted}")
else:
print("Operation cancelled.")


if __name__ == "__main__":
main()
13 changes: 10 additions & 3 deletions src/scripts/merge_tools/gitmerge_ort_imports.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,16 @@ if "$MERGE_SCRIPTS_DIR"/gitmerge.sh "$clone_dir" "$branch1" "$branch2" "$strateg
fi

cd "$clone_dir" || exit 1
if ! "$MERGE_SCRIPTS_DIR"/resolve-import-conflicts; then
echo "Conflict"
exit 1
"$MERGE_SCRIPTS_DIR"/resolve-import-conflicts;

# Detect conflicts using Git commands
conflict_files=$(git diff --name-only --diff-filter=U)

if [ -n "$conflict_files" ]; then
echo "Conflict detected in the following files:"
echo "$conflict_files"
exit 1
fi

echo "No conflicts detected."
exit 0
13 changes: 10 additions & 3 deletions src/scripts/merge_tools/gitmerge_ort_imports_ignorespace.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,16 @@ if "$MERGE_SCRIPTS_DIR"/gitmerge.sh "$clone_dir" "$branch1" "$branch2" "$strateg
fi

cd "$clone_dir" || exit 1
if ! "$MERGE_SCRIPTS_DIR"/resolve-import-conflicts; then
echo "Conflict"
exit 1
"$MERGE_SCRIPTS_DIR"/resolve-import-conflicts;

# Detect conflicts using Git commands
conflict_files=$(git diff --name-only --diff-filter=U)

if [ -n "$conflict_files" ]; then
echo "Conflict detected in the following files:"
echo "$conflict_files"
exit 1
fi

echo "No conflicts detected."
exit 0
Loading

0 comments on commit 2290887

Please sign in to comment.