Skip to content

Commit

Permalink
Remove hardcoding from select_from_results.py (#280)
Browse files Browse the repository at this point in the history
  • Loading branch information
mernst authored Apr 30, 2024
1 parent 7e07031 commit e88e623
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 31 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,6 @@ To run style checking run `make style`.
## Comparing merge algorithms

To investigate differences between two mergers:
* edit file `src/python/select_from_results.py` to reflect the differences you are interested in.
* run `src/python/select_from_results.py` to create a .csv database containing only the differences.
* run `src/python/select_from_results.py` to produce a CSV file containing only the differences you are interested in.
* run `` to create a .csv database containing only the differences.
* run `src/python/replay_merge.py --merges_csv CSV_FILE --idx INDEX` (maybe add `-test`) for the index of the merge you are interested in.
85 changes: 56 additions & 29 deletions src/python/select_from_results.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,75 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""Output a subset of the results that match a hard-coded condition, to a hard-coded file.
"""Output a subset of the results, to standard out.
The arguments are a query and a list of columns.
The query is executed (to select rows), then columns are output that include:
* idx
* all the columns that appear in the query
* any additional columns specified on the command line.
To change the condition or file, edit this script.
The query is an expression using dataframe variables.
Here are example invocations:
select_from_results.py '(gitmerge_ort == "Merge_failed") and (spork != "Merge_failed")'
select_from_results.py '(gitmerge_ort == "Merge_failed") != (spork == "Merge_failed")'
"""

import argparse
from os import system
import re
import tempfile
import pandas as pd

df = pd.read_csv("../../results/combined/result.csv", index_col="idx")

# print(df.iloc[3])
# print(df.iloc[3].gitmerge_ort_imports_ignorespace)
# print(df.iloc[3].gitmerge_ort_ignorespace)
# print(
# df.iloc[3].gitmerge_ort_imports_ignorespace == df.iloc[3].gitmerge_ort_ignorespace
# )
def columns_in_query(query):
"""Returns all the identifiers used in the query."""
result = re.findall(r"""(?<!['"])\b[A-Za-z][A-Za-z_]*\b(?!['"])""", query)
if "and" in result:
result.remove("and")
if "or" in result:
result.remove("or")
return result


def is_success(val):
"""Returns true if the given result is a success result."""
return val == "Tests_passed"
# Testing:
# columns_in_query('(gitmerge_ort == "Merge_failed") && (spork != "Merge_failed")')
# columns_in_query('(gitmerge_ort == "Merge_failed") != (spork == "Merge_failed")')


def merge_failed(val):
"""Returns true if the given result indicates that the merge succeeded."""
return val == "Merge_failed"
def main():
"Selects rows and columns from results."
parser = argparse.ArgumentParser(
prog="select_from_results.py",
description="Outputs a subset of the results, to standard out",
)
parser.add_argument("query")
parser.add_argument(
"--input", action="store", default="../../results/combined/result.csv"
)
# Todo: Also parse arguments from the query.
parser.add_argument("columns", nargs=argparse.REMAINDER)
args = parser.parse_args()

df = pd.read_csv(args.input)

def merge_succeeded(val):
"""Returns true if the given result indicates that the merge succeeded."""
return val != "Merge_failed"
# Select some rows.
df = df.query(args.query)

# Select some columns
columns_to_select = (
["idx", "repo-idx", "merge-idx", "branch_name", "merge", "left", "right"]
+ columns_in_query(args.query)
+ args.columns
)
df = df[columns_to_select]

# Retain rows where gitmerge_ort_imports_ignorespace and gitmerge_ort_ignorespace differ.
# df = df[
# merge_failed(df.gitmerge_ort_imports_ignorespace)
# != merge_failed(df.gitmerge_ort_ignorespace)
# ]
# df.to_csv("../../results/combined/imports-differs-from-ort.csv", index_label="idx")
# Gross way to produce output to standard out
with tempfile.TemporaryFile() as tmpfile:
df.to_csv(tmpfile)
print(tmpfile.name)
system("cat " + tmpfile.name)

# Select some rows.
df = df[merge_failed(df.gitmerge_ort) != merge_failed(df.spork)]
# Select some columns (is it OK to omit "idx"??)
df = df[["gitmerge_ort", "spork"]]

df.to_csv("../../results/combined/spork-differs-from-ort.csv", index_label="idx")
if __name__ == "__main__":
main()

0 comments on commit e88e623

Please sign in to comment.