Skip to content

Commit

Permalink
Remove hardcoding from select_from_results.py
Browse files Browse the repository at this point in the history
  • Loading branch information
mernst committed Apr 30, 2024
1 parent 1ee216c commit 76a5dc8
Showing 1 changed file with 54 additions and 29 deletions.
83 changes: 54 additions & 29 deletions src/python/select_from_results.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,73 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""Output a subset of the results that match a hard-coded condition, to a hard-coded file.
"""Output a subset of the results, to standard out.
The arguments are a query and a list of columns.
The query is executed (to select rows), then columns are output that include:
* idx
* all the columns that appear in the query
* any additional columns specified on the command line.
To change the condition or file, edit this script.
The query is an expression using dataframe variables.
Here are examples:
(gitmerge_ort == "Merge_failed") and (spork != "Merge_failed")
(gitmerge_ort == "Merge_failed") and (spork == "Merge_failed")
"""

import argparse
from os import system
import pandas as pd
import re
import tempfile

df = pd.read_csv("../../results/combined/result.csv", index_col="idx")

# print(df.iloc[3])
# print(df.iloc[3].gitmerge_ort_imports_ignorespace)
# print(df.iloc[3].gitmerge_ort_ignorespace)
# print(
# df.iloc[3].gitmerge_ort_imports_ignorespace == df.iloc[3].gitmerge_ort_ignorespace
# )
def columns_in_query(query):
"""Returns all the identifiers used in the query."""
result = re.findall(r"""(?<!['"])\b[A-Za-z][A-Za-z_]*\b(?!['"])""", query)
if "and" in result:
result.remove("and")
if "or" in result:
result.remove("or")
return result


def is_success(val):
"""Returns true if the given result is a success result."""
return val == "Tests_passed"
# Testing:
# columns_in_query('(gitmerge_ort == "Merge_failed") && (spork != "Merge_failed")')
# columns_in_query('(gitmerge_ort == "Merge_failed") != (spork == "Merge_failed")')


def merge_failed(val):
"""Returns true if the given result indicates that the merge succeeded."""
return val == "Merge_failed"
def main():
parser = argparse.ArgumentParser(
prog="select_from_results.py",
description="Outputs a subset of the results, to standard out",
)
parser.add_argument("query")
parser.add_argument(
"--input", action="store", default="../../results/combined/result.csv"
)
# Todo: Also parse arguments from the query.
parser.add_argument("columns", nargs=argparse.REMAINDER)
args = parser.parse_args()

df = pd.read_csv(args.input)

def merge_succeeded(val):
"""Returns true if the given result indicates that the merge succeeded."""
return val != "Merge_failed"
# Select some rows.
df = df.query(args.query)

# Select some columns
columns_to_select = (
["idx", "repo-idx", "merge-idx", "branch_name", "merge", "left", "right"]
+ columns_in_query(args.query)
+ args.columns
)
df = df[columns_to_select]

# Retain rows where gitmerge_ort_imports_ignorespace and gitmerge_ort_ignorespace differ.
# df = df[
# merge_failed(df.gitmerge_ort_imports_ignorespace)
# != merge_failed(df.gitmerge_ort_ignorespace)
# ]
# df.to_csv("../../results/combined/imports-differs-from-ort.csv", index_label="idx")
# Gross way to produce output to standard out
tmpfile = tempfile.NamedTemporaryFile()
df.to_csv(tmpfile)
print(tmpfile.name)
system("cat " + tmpfile.name)

# Select some rows.
df = df[merge_failed(df.gitmerge_ort) != merge_failed(df.spork)]
# Select some columns (is it OK to omit "idx"??)
df = df[["gitmerge_ort", "spork"]]

df.to_csv("../../results/combined/spork-differs-from-ort.csv", index_label="idx")
if __name__ == "__main__":
main()

0 comments on commit 76a5dc8

Please sign in to comment.