From 94785ab3b36be463eef6c8ac06e5f6dc190fbcf6 Mon Sep 17 00:00:00 2001 From: Benedikt Date: Mon, 23 Sep 2024 16:32:44 +0200 Subject: [PATCH] Create two separate files --- .github/workflows/check-reproducibility.yml | 2 +- Makefile | 6 +++--- README.md | 8 ++++---- src/python/delete_cache_entry_of_merge_tests.py | 2 +- src/python/delete_tests_with_different_output.py | 2 +- src/python/latex_output.py | 7 +++---- src/python/replay_merge.py | 2 +- src/python/utils/build_inconsistent_merges.py | 2 +- src/python/utils/select_from_results.py | 2 +- 9 files changed, 16 insertions(+), 17 deletions(-) diff --git a/.github/workflows/check-reproducibility.yml b/.github/workflows/check-reproducibility.yml index dd853ec404..5311acb8d9 100644 --- a/.github/workflows/check-reproducibility.yml +++ b/.github/workflows/check-reproducibility.yml @@ -47,7 +47,7 @@ jobs: uses: jlumbroso/free-disk-space@main - name: make check-merges-reproducibility run: | - head -n 151 results/combined/result.csv > temp.csv && mv temp.csv results/combined/result.csv + head -n 151 results/combined/result_raw.csv > temp.csv && mv temp.csv results/combined/result_raw.csv make check-merges-reproducibility env: GITHUB_TOKEN: ${{ secrets.TOKEN_GITHUB }} diff --git a/Makefile b/Makefile index db0f48e63a..3e35b4e824 100644 --- a/Makefile +++ b/Makefile @@ -8,9 +8,9 @@ SH_SCRIPTS = $(shell grep --exclude-dir=build --exclude-dir=repos --exclude-di BASH_SCRIPTS = $(shell grep --exclude-dir=build --exclude-dir=repos --exclude-dir=cache -r -l '^\#! \?\(/bin/\|/usr/bin/env \)bash' * | grep -v /.git/ | grep -v '~$$' | grep -v '\.tar$$' | grep -v gradlew) PYTHON_FILES = $(shell find . -name '*.py' ! -path './repos/*' -not -path "./.workdir/*" -not -path "./cache*/*" | grep -v '/__pycache__/' | grep -v '/.git/' | grep -v gradlew | grep -v git-hires-merge) -CSV_RESULTS_COMBINED = results/combined/result.csv -CSV_RESULTS_GREATEST_HITS = results/greatest_hits/result.csv -CSV_RESULTS_REAPER = results/reaper/result.csv +CSV_RESULTS_COMBINED = results/combined/result_raw.csv +CSV_RESULTS_GREATEST_HITS = results/greatest_hits/result_raw.csv +CSV_RESULTS_REAPER = results/reaper/result_raw.csv CSV_RESULTS = $(CSV_RESULTS_COMBINED) NUM_PROCESSES = 0 diff --git a/README.md b/README.md index 24eb355d48..937bb221a9 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ make small-test This runs the entire code on two small repos. The output data appears in `results/small/`. -* `results/small/result.csv`: the final result +* `results/small/result_adjusted.csv`: the final result * `results/small/merges/` contains all the merges. @@ -79,7 +79,7 @@ The output data appears in `results/small/`. * `results/small/merges_tested/` contains all merges that have been tested. -* `results/small/result.csv` contains the final result. +* `results/small/result_adjusted.csv` contains the final result. ### Perform full analysis @@ -97,7 +97,7 @@ To run the stack on all repos and also diff the merges' outputs: This will run the entire code on all the repos and automatically decompress the cache if `cache/` does not exist. All the output data can be found in `results/`. -The final result is found in `results/result.csv`. +The final result is found in `results/result_adjusted.csv`. Directory `results/merges` contains all the merges for each repo. Directory `results/merges_tested` contains all the merges that have been tested. @@ -248,7 +248,7 @@ To investigate differences between two mergers: * Run `src/python/utils/select_from_results.py` to create a .csv database containing only the differences. * Set `DELETE_WORKDIRS` to `false` in `src/python/variables.py`. * Run `src/python/replay_merge.py --idx INDEX` (maybe add `-test`) for the index of the merge you are interested in. - If the merge is in the small test, you may need to add `--merges_csv ./test/small-goal-files/result.csv`. + If the merge is in the small test, you may need to add `--merges_csv ./test/small-goal-files/result_adjusted.csv`. ## Overwriting results manually diff --git a/src/python/delete_cache_entry_of_merge_tests.py b/src/python/delete_cache_entry_of_merge_tests.py index 2cc1556322..302d74e30b 100644 --- a/src/python/delete_cache_entry_of_merge_tests.py +++ b/src/python/delete_cache_entry_of_merge_tests.py @@ -38,7 +38,7 @@ def delete_row(row): parser.add_argument( "--result", type=str, - default="results/combined/result.csv", + default="results/combined/result_adjusted.csv", help="The result csv file.", ) parser.add_argument( diff --git a/src/python/delete_tests_with_different_output.py b/src/python/delete_tests_with_different_output.py index f433ea0224..aacf79707f 100644 --- a/src/python/delete_tests_with_different_output.py +++ b/src/python/delete_tests_with_different_output.py @@ -14,7 +14,7 @@ def main(): parser.add_argument( "--result", type=str, - default="results/combined/result.csv", + default="results/combined/result_adjusted.csv", help="The result csv file.", ) rows_affected = 0 diff --git a/src/python/latex_output.py b/src/python/latex_output.py index 5d6ab6610e..c8bdc540b4 100755 --- a/src/python/latex_output.py +++ b/src/python/latex_output.py @@ -495,9 +495,9 @@ def main(): print(f"CSV saved to: {csv_filename}") print(f"Rows: {len(filtered_df)}") + + result_df.to_csv(args.output_dir / "result_raw.csv", index_label="idx") - for merge_tool in MERGE_TOOL: - result_df[merge_tool.name + "_raw"] = result_df[merge_tool.name] for idx, row in result_df.iterrows(): for merge_tool in MERGE_TOOL: if "plus" in merge_tool.name: @@ -507,8 +507,7 @@ def main(): if result1.name == MERGE_STATE.Merge_failed.name \ and result2.name == TEST_STATE.Tests_failed.name: result_df.loc[idx, merge_tool.name] = TEST_STATE.Tests_failed.name - - result_df.to_csv(args.output_dir / "result.csv", index_label="idx") + result_df.to_csv(args.output_dir / "result_adjusted.csv", index_label="idx") main_df = result_df[result_df["branch_name"].isin(main_branch_names)] feature = result_df[~result_df["branch_name"].isin(main_branch_names)] diff --git a/src/python/replay_merge.py b/src/python/replay_merge.py index db984637e1..79d4908b33 100755 --- a/src/python/replay_merge.py +++ b/src/python/replay_merge.py @@ -405,7 +405,7 @@ def merge_replay( "--merges_csv", help="CSV file with merges that have been tested", type=str, - default="results/combined/result.csv", + default="results/combined/result_adjusted.csv", ) parser.add_argument( "--idx", diff --git a/src/python/utils/build_inconsistent_merges.py b/src/python/utils/build_inconsistent_merges.py index e752d2f2c7..d0b4d70694 100644 --- a/src/python/utils/build_inconsistent_merges.py +++ b/src/python/utils/build_inconsistent_merges.py @@ -63,7 +63,7 @@ def main(): "--result_csv", type=str, help="Path to the result CSV file", - default="results/combined/result.csv", + default="results/combined/result_adjusted.csv", ) parser.add_argument( "--output_csv", diff --git a/src/python/utils/select_from_results.py b/src/python/utils/select_from_results.py index de245157d6..92205883f7 100755 --- a/src/python/utils/select_from_results.py +++ b/src/python/utils/select_from_results.py @@ -52,7 +52,7 @@ def main(): parser.add_argument( "--input", action="store", - default="results/combined/result.csv", + default="results/combined/result_adjusted.csv", ) parser.add_argument("columns", nargs=argparse.REMAINDER) args = parser.parse_args()