Skip to content

Commit

Permalink
Added pre-commit hook (#257)
Browse files Browse the repository at this point in the history
  • Loading branch information
benedikt-schesch authored Nov 26, 2023
1 parent cad8d3a commit 6a06239
Show file tree
Hide file tree
Showing 28 changed files with 156 additions and 70 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check-style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ jobs:
activate-environment: AST
environment-file: environment.yml
- name: Install shellcheck and checkbashisms
run: sudo apt install shellcheck devscripts
run: sudo apt update && sudo apt install shellcheck devscripts
- name: Check style
run: make style
1 change: 0 additions & 1 deletion .github/workflows/small-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,3 @@ jobs:
make small-test
env:
GITHUB_TOKEN: ${{ secrets.TOKEN_GITHUB }}

57 changes: 57 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: check-added-large-files
- id: check-ast
- id: check-byte-order-marker
- id: check-builtin-literals
- id: check-case-conflict
- id: check-docstring-first
- id: check-executables-have-shebangs
- id: check-json
- id: check-shebang-scripts-are-executable
- id: pretty-format-json
- id: check-merge-conflict
- id: check-symlinks
- id: check-toml
- id: check-vcs-permalinks
- id: check-xml
- id: check-yaml
- id: debug-statements
- id: destroyed-symlinks
- id: detect-aws-credentials
- id: detect-private-key
- id: end-of-file-fixer
- id: file-contents-sorter
- id: fix-byte-order-marker
- id: fix-encoding-pragma
- id: forbid-new-submodules
- id: forbid-submodules
- id: mixed-line-ending
- id: name-tests-test
- id: no-commit-to-branch
- id: requirements-txt-fixer
- id: sort-simple-yaml
- id: trailing-whitespace
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.10.0
hooks:
- id: python-check-blanket-noqa
- id: python-check-mock-methods
- id: python-no-eval
- id: python-no-log-warn
- id: python-use-type-annotations
- id: rst-backticks
- id: rst-directive-colons
- id: rst-inline-touching-normal
- id: text-unicode-replacement-char
- repo: https://github.com/psf/black
rev: 23.11.0
hooks:
- id: black
- repo: https://github.com/PyCQA/pylint
rev: v3.0.1
hooks:
- id: pylint
args: ['--disable=R0801,E0401,W0718,W0719,W1510,C0103,W0621']
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies:
- ncurses
- openssl
- pip
- pre-commit
- readline
- setuptools
- sqlite
Expand Down
1 change: 0 additions & 1 deletion input_data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,3 @@ To refresh the contents of this directory, run script `get_repos.py`.
* repos_small.csv -> List of only 2 repos.

* repos_small_with_hashes.csv -> repos_small.csv but with the tested commit for repository validation

Empty file modified run_xp.sh
100644 → 100755
Empty file.
3 changes: 2 additions & 1 deletion src/python/add_jacoco_gradle.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
""" Adds Jacoco plugin configuration to a Gradle build file. """


Expand All @@ -14,7 +15,7 @@ def update_gradle_for_jacoco(gradle_path: str) -> None:
+ " html.enabled true\n }\n}"
)

with open(gradle_path, "r+") as f:
with open(gradle_path, "r+", encoding="utf-8") as f:
content = f.read()

# Only add Jacoco plugin if it's not already there
Expand Down
1 change: 1 addition & 0 deletions src/python/add_jacoco_maven.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
""" Adds Jacoco plugin configuration to a Maven pom.xml file. """

from typing import Any
Expand Down
7 changes: 4 additions & 3 deletions src/python/cache_cleanup.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
"""
# -*- coding: utf-8 -*-
"""
Deletes all placeholders from the cache. Placeholders are created when a
a process starts; it indicates that is has started and is still running.
If the process fails, the placeholder is not replaced with the actual
Expand Down Expand Up @@ -33,7 +34,7 @@
if file.is_dir():
continue
try:
with open(file, "r") as f:
with open(file, "r", encoding="utf-8") as f:
data = json.load(f)
except json.JSONDecodeError:
print(f"Could not read {file}")
Expand All @@ -53,6 +54,6 @@
data.pop(key)
n_deleted += 1

with open(file, "w") as f:
with open(file, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4)
print(f"Deleted {n_deleted} placeholders")
3 changes: 2 additions & 1 deletion src/python/cache_merger.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Merge multiple caches into one
# -*- coding: utf-8 -*-
""" Merge multiple caches into one
Usage: python cache_merger.py cache1 cache2 cache3 --output_cache cache_merged
"""

Expand Down
15 changes: 8 additions & 7 deletions src/python/cache_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Contains all the functions related to the caches. The functions to interact with each
of the caches are in this file. Each cache is interacted with through the functions
of the caches are in this file. Each cache is interacted with through the functions
of this file. The caches are all JSON files and are stored in the cache directory.
There will be 4 caches in total which are stored on disk after running the run.sh script:
1) cache/sha_cache_entry: A cache that maps the commit hash to a sha256 hash of the repository.
2) cache/test_cache: A cache that maps a sha256 to test results.
3) cache/merge_results: A cache that maps a merge to the result
3) cache/merge_results: A cache that maps a merge to the result
of the merge (sha256, run time, and MERGE_STATE).
4) cache/merge_diffs: A cache that stores the diff between merge tools.
"""
Expand Down Expand Up @@ -45,18 +46,18 @@ def set_in_cache(
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_directory (Path): The path to the cache directory.
"""
lock = get_cache_lock(repo_slug, cache_directory)
if acquire_lock:
lock = get_cache_lock(repo_slug, cache_directory)
lock.acquire()
cache_path = get_cache_path(repo_slug, cache_directory)
cache = load_cache(repo_slug, cache_directory)
cache[cache_key] = cache_value
output = json.dumps(cache, indent=4)
with open(cache_path, "w") as f:
with open(cache_path, "w", encoding="utf-8") as f:
f.write(output)
f.flush()
if acquire_lock:
lock.release() # type: ignore
if acquire_lock and lock is not None:
lock.release()


def lookup_in_cache(
Expand Down Expand Up @@ -160,6 +161,6 @@ def load_cache(repo_slug: str, cache_directory: Path) -> dict:
cache_path = get_cache_path(repo_slug, cache_directory)
if not cache_path.exists():
return {}
with open(cache_path, "r") as f:
with open(cache_path, "r", encoding="utf-8") as f:
cache_data = json.load(f)
return cache_data
8 changes: 4 additions & 4 deletions src/python/delete_cache_placeholders.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
"""
# -*- coding: utf-8 -*-
"""
Deletes all placeholders from the cache. Placeholders are created when a
a process starts; it indicates that is has started and is still running.
If the process fails, the placeholder is not replaced with the actual
Expand All @@ -15,7 +16,6 @@
from argparse import ArgumentParser
from pathlib import Path
import json
import shutil

if __name__ == "__main__":
parser = ArgumentParser()
Expand All @@ -31,7 +31,7 @@
if file.is_dir():
continue
try:
with open(file, "r") as f:
with open(file, "r", encoding="utf-8") as f:
data = json.load(f)
except json.JSONDecodeError:
print(f"Could not read {file}")
Expand All @@ -43,6 +43,6 @@
data.pop(key)
n_deleted += 1

with open(file, "w") as f:
with open(file, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4)
print(f"Deleted {n_deleted} placeholders")
3 changes: 1 addition & 2 deletions src/python/get_repos.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Download repo list."""

# usage: python3 get_repos.py
Expand All @@ -10,8 +11,6 @@
import gzip
import urllib.request
from io import BytesIO
import os
import sys

import pandas as pd
import numpy as np
Expand Down
30 changes: 19 additions & 11 deletions src/python/latex_output.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Output LaTeX tables and plots.
usage: python3 latex_output.py
Expand Down Expand Up @@ -33,7 +34,7 @@
from tqdm import tqdm
import seaborn as sns

from merge_tester import TIMEOUT_TESTING_MERGE, TIMEOUT_TESTING_PARENT
from variables import TIMEOUT_TESTING_PARENT, TIMEOUT_TESTING_MERGE
from repo import MERGE_STATE, TEST_STATE, MERGE_TOOL
from cache_utils import slug_repo_name

Expand Down Expand Up @@ -141,7 +142,6 @@ def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statem
result_df_list = []
repos = pd.read_csv(args.repos_head_passes_csv, index_col="idx")
for _, repository_data in tqdm(repos.iterrows(), total=len(repos)):
merges_repo = []
repo_slug = repository_data["repository"]
merge_list_file = Path(
os.path.join(args.tested_merges_path, slug_repo_name(repo_slug) + ".csv")
Expand Down Expand Up @@ -207,7 +207,7 @@ def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statem
):
result[idx][idx2 + idx + 1] += 1
result[idx2 + idx + 1][idx] += 1
fig, ax = plt.subplots()
_, ax = plt.subplots()
result = np.tril(result)
latex_merge_tool = [
"\\mbox{" + merge_tool_latex_name(i) + "}" for i in merge_tools
Expand Down Expand Up @@ -236,12 +236,16 @@ def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statem
plt.savefig(os.path.join(plots_output_path, "heatmap.pdf"))
plt.close()
# Correct the path to the stored image in the pgf file.
with open(os.path.join(plots_output_path, "heatmap.pgf"), "rt") as f:
with open(
os.path.join(plots_output_path, "heatmap.pgf"), "rt", encoding="utf-8"
) as f:
file_content = f.read()
file_content = file_content.replace(
"heatmap-img0.png", f"plots/{plot_category}/heatmap-img0.png"
)
with open(os.path.join(plots_output_path, "heatmap.pgf"), "wt") as f:
with open(
os.path.join(plots_output_path, "heatmap.pgf"), "wt", encoding="utf-8"
) as f:
f.write(file_content)

incorrect = []
Expand All @@ -264,7 +268,7 @@ def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statem

# Cost plot
MAX_COST = 120
fig, ax = plt.subplots()
_, ax = plt.subplots()
for idx, merge_tool in enumerate(merge_tools):
results = []
for cost_factor in np.linspace(1, MAX_COST, 1000):
Expand All @@ -282,7 +286,7 @@ def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statem
alpha=0.8,
)
plt.xlabel("Incorrect merges cost factor $k$")
plt.ylabel("\mbox{Merge\_Score}")
plt.ylabel("\\mbox{Merge_Score}")
plt.xlim(0, 20)
plt.ylim(0.75, 0.95)
plt.legend()
Expand All @@ -291,7 +295,7 @@ def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statem
plt.savefig(os.path.join(plots_output_path, "cost_without_manual.pdf"))

# Cost plot with manual merges
line = ax.plot(
ax.plot(
np.linspace(1, MAX_COST, 1000),
np.zeros(1000),
label="Manual Merging",
Expand Down Expand Up @@ -335,7 +339,9 @@ def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statem
table += f" & {incorrect[merge_tool_idx]:5} & {round(incorrect_percentage):3}\\% \\\\\n"
table += "\\end{tabular}\n"

with open(os.path.join(tables_output_path, "table_summary.tex"), "w") as file:
with open(
os.path.join(tables_output_path, "table_summary.tex"), "w", encoding="utf-8"
) as file:
file.write(table)

# Printed Table
Expand Down Expand Up @@ -431,7 +437,9 @@ def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statem
table2 += "\\end{tabular}\n"

with open(
os.path.join(tables_output_path, "table_feature_main_summary.tex"), "w"
os.path.join(tables_output_path, "table_feature_main_summary.tex"),
"w",
encoding="utf-8",
) as file:
file.write(table2)

Expand Down Expand Up @@ -530,7 +538,7 @@ def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statem
output += latex_def("parentTestTimeout", str(TIMEOUT_TESTING_PARENT // 60))
output += latex_def("mergeTestTimeout", str(TIMEOUT_TESTING_MERGE // 60))

with open(os.path.join(args.output_dir, "defs.tex"), "w") as file:
with open(os.path.join(args.output_dir, "defs.tex"), "w", encoding="utf-8") as file:
file.write(output)


Expand Down
3 changes: 2 additions & 1 deletion src/python/merge_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Analyze the merges i.e. check if the parents pass tests and statistics between merges.
usage: python3 merge_analyzer.py --repos_head_passes_csv <path_to_repos_head_passes.csv>
--merges_path <path_to_merges>
Expand All @@ -24,7 +25,7 @@
from tqdm import tqdm
from cache_utils import set_in_cache, lookup_in_cache, slug_repo_name
from write_head_hashes import num_processes
from variables import TIMEOUT_MERGING, TIMEOUT_TESTING_PARENT, N_TESTS
from variables import TIMEOUT_TESTING_PARENT, N_TESTS
import matplotlib.pyplot as plt

if os.getenv("TERM", "dumb") == "dumb":
Expand Down
Loading

0 comments on commit 6a06239

Please sign in to comment.