Skip to content

Commit

Permalink
Mike's code review comments (#208)
Browse files Browse the repository at this point in the history
  • Loading branch information
mernst authored Sep 27, 2023
1 parent f18f2ce commit d5a8de8
Show file tree
Hide file tree
Showing 20 changed files with 636 additions and 477 deletions.
2 changes: 1 addition & 1 deletion run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ if [ -d "$CACHE_DIR" ]; then
find "$CACHE_DIR" -name "*.lock" -delete
fi

python3 src/python/clean_cache_placeholders.py \
python3 src/python/delete_cache_placeholders.py \
--cache_dir "$CACHE_DIR"

python3 src/python/write_head_hashes.py \
Expand Down
206 changes: 87 additions & 119 deletions src/python/cache_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env python3
""" Contains all the functions related to the caches.
There will be 4 caches which are stored on disk after running the script:
""" Contains all the functions related to the caches. The functions to interact with each
of the caches are in this file. Each cache is interacted with through the functions
of this file. The caches are all JSON files and are stored in the cache directory.
There will be 4 caches in total which are stored on disk after running the run.sh script:
1) cache/sha_cache_entry: A cache that maps the commit hash to a sha256 hash of the repository.
2) cache/test_cache: A cache that maps a sha256 to test results.
3) cache/merge_results: A cache that maps a merge to the result
Expand Down Expand Up @@ -28,159 +30,61 @@ def slug_repo_name(repo_slug: str) -> str:
return repo_slug.split("/")[1]


def get_cache_lock(repo_slug: str, cache_prefix: Path):
"""Returns a lock for the cache of a repository.
Initially the lock is unlocked; the caller must explictly
lock and unlock the lock.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
Returns:
fasteners.InterProcessLock: A lock for the repository.
"""
lock_path = cache_prefix / "locks" / (slug_repo_name(repo_slug) + ".lock")
lock_path.parent.mkdir(parents=True, exist_ok=True)
lock = fasteners.InterProcessLock(lock_path)
return lock


def get_cache_path(repo_slug: str, cache_prefix: Path) -> Path:
"""Returns the path to the cache file.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
Returns:
Path: The path to the cache file.
"""
cache_file_name = slug_repo_name(repo_slug) + ".json"
cache_path = cache_prefix / cache_file_name
cache_path.parent.mkdir(parents=True, exist_ok=True)
return cache_path


def is_in_cache(
cache_key: Union[Tuple, str], repo_slug: str, cache_prefix: Path
) -> bool:
"""Checks if the key is in the cache.
Args:
cache_key (Union[Tuple,str]): The key to check.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path prefix to the cache directory.
Returns:
bool: True if the repository is in the cache, False otherwise.
"""
cache = load_cache(repo_slug, cache_prefix)
return cache_key in cache


def load_cache(repo_slug: str, cache_prefix: Path) -> dict:
"""Loads the cache.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
Returns:
dict: The cache.
"""
cache_path = get_cache_path(repo_slug, cache_prefix)
if not cache_path.exists():
return {}
with open(cache_path, "r") as f:
cache_data = json.load(f)
return cache_data


def cache_lookup(
cache_key: Union[Tuple, str], repo_slug: str, cache_prefix: Path
) -> dict:
"""Loads the cache and returns the value for the given key.
Args:
cache_key (Union[Tuple,str]): The key to check.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
Returns:
dict: The cache.
"""
cache = load_cache(repo_slug, cache_prefix)
return cache[cache_key]


def write_to_cache(
def set_in_cache(
cache_key: Union[Tuple, str],
cache_value: Union[str, dict, None],
repo_slug: str,
cache_prefix: Path,
overwrite: bool = True,
cache_directory: Path,
acquire_lock: bool = True,
) -> None:
"""Puts an entry in the cache, then writes the cache to disk.
This function is not thread-safe.
Args:
cache_key (Union[Tuple,str]): The key to check.
cache_value (dict): The value to write.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
overwrite (bool, optional) = True: Whether to overwrite an existing cache entry.
If False, attempting to overwrite an existing cache entry throws an exception.
cache_directory (Path): The path to the cache directory.
"""
cache_path = get_cache_path(repo_slug, cache_prefix)
cache = load_cache(repo_slug, cache_prefix)
if cache_prefix != Path("cache-small/sha_cache_entry"):
print("write_to_cache:", cache_key, cache_value, repo_slug, cache_prefix)
if cache_key in cache and not overwrite:
raise ValueError("Cache key already exists")
if acquire_lock:
lock = get_cache_lock(repo_slug, cache_directory)
lock.acquire()
cache_path = get_cache_path(repo_slug, cache_directory)
cache = load_cache(repo_slug, cache_directory)
cache[cache_key] = cache_value
output = json.dumps(cache, indent=4)
with open(cache_path, "w") as f:
f.write(output)
f.flush()


def set_in_cache(
cache_key: Union[Tuple, str],
cache_value: Union[str, dict, None],
repo_slug: str,
cache_prefix: Path,
overwrite: bool = True,
) -> None:
"""Puts an entry in the cache, then writes the cache to disk.
This function is thread-safe.
Args:
cache_key (Union[Tuple,str]): The key to check.
cache_value (dict): The value to write.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
overwrite (bool, optional) = True: Whether to overwrite the cache if it already exists.
"""
lock = get_cache_lock(repo_slug, cache_prefix)
lock.acquire()
write_to_cache(cache_key, cache_value, repo_slug, cache_prefix, overwrite)
lock.release()
if acquire_lock:
lock.release() # type: ignore


def lookup_in_cache(
cache_key: Union[Tuple, str],
repo_slug: str,
cache_prefix: Path,
cache_directory: Path,
set_run: bool = False,
) -> Union[str, dict, None]:
"""Checks if the cache is available and loads a specific entry.
Args:
cache_key (Union[Tuple,str]): The key to check.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
cache_directory (Path): The path to the cache directory.
set_run (bool, optional) = False: Wheter to insert an empty cache entry
if it does not exist. This is useful for preventing multiple runs from
attempting to insert the same cache entry.
Returns:
Union[dict,None]: The cache entry if it exists, None otherwise.
"""
lock = get_cache_lock(repo_slug, cache_prefix)
lock = get_cache_lock(repo_slug, cache_directory)
lock.acquire()
cache_entry = get_cache_path(repo_slug, cache_prefix)
cache_entry = get_cache_path(repo_slug, cache_directory)
cache_entry.parent.mkdir(parents=True, exist_ok=True)
if is_in_cache(cache_key, repo_slug, cache_prefix):
if is_in_cache(cache_key, repo_slug, cache_directory):
total_time = 0
while True:
cache_data = cache_lookup(cache_key, repo_slug, cache_prefix)
cache = load_cache(repo_slug, cache_directory)
cache_data = cache[cache_key]
if cache_data is not None:
break
lock.release()
Expand All @@ -192,6 +96,70 @@ def lookup_in_cache(
lock.release()
return cache_data
if set_run:
write_to_cache(cache_key, None, repo_slug, cache_prefix)
set_in_cache(cache_key, None, repo_slug, cache_directory, acquire_lock=False)
lock.release()
return None


# ====================== Internal functions ======================


def get_cache_lock(repo_slug: str, cache_directory: Path):
"""Returns a lock for the cache of a repository.
Initially the lock is unlocked; the caller must explictly
lock and unlock the lock.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_directory (Path): The path to the cache directory.
Returns:
fasteners.InterProcessLock: A lock for the repository.
"""
lock_path = cache_directory / "locks" / (slug_repo_name(repo_slug) + ".lock")
lock_path.parent.mkdir(parents=True, exist_ok=True)
lock = fasteners.InterProcessLock(lock_path)
return lock


def get_cache_path(repo_slug: str, cache_directory: Path) -> Path:
"""Returns the path to the cache file.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_directory (Path): The path to the cache directory.
Returns:
Path: The path to the cache file.
"""
cache_file_name = slug_repo_name(repo_slug) + ".json"
cache_path = cache_directory / cache_file_name
cache_path.parent.mkdir(parents=True, exist_ok=True)
return cache_path


def is_in_cache(
cache_key: Union[Tuple, str], repo_slug: str, cache_directory: Path
) -> bool:
"""Checks if the key is in the cache.
Args:
cache_key (Union[Tuple,str]): The key to check.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_directory (Path): The path prefix to the cache directory.
Returns:
bool: True if the repository is in the cache, False otherwise.
"""
cache = load_cache(repo_slug, cache_directory)
return cache_key in cache


def load_cache(repo_slug: str, cache_directory: Path) -> dict:
"""Loads the cache associated to the repo_slug found in the cache directory.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_directory (Path): The path to the cache directory.
Returns:
dict: The cache.
"""
cache_path = get_cache_path(repo_slug, cache_directory)
if not cache_path.exists():
return {}
with open(cache_path, "r") as f:
cache_data = json.load(f)
return cache_data
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
result. This script deletes all placeholders from the cache.
Usage:
python clean_cache_placeholders.py --cache_path <path_to_cache>
python delete_cache_placeholders.py --cache_directory <path_to_cache>
Args:
cache_path (str): Path to cache directory
cache_directory (str): Path to cache directory
"""

from argparse import ArgumentParser
Expand All @@ -23,10 +23,10 @@
)
args = parser.parse_args()

cache_path = Path(args.cache_dir)
cache_directory = Path(args.cache_dir)

n_deleted = 0
for file in cache_path.glob("**/*.json"):
for file in cache_directory.glob("**/*.json"):
with open(file, "r") as f:
data = json.load(f)

Expand Down
8 changes: 4 additions & 4 deletions src/python/latex_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
following input files:
- full_repos_csv: csv file containing the full list of repositories
- repos_head_passes_csv: csv file containing the list of repositories whose head passes tests
- tested_merges_path: path to the folder containing the merge results
- merges_path: path to the folder containing all found merges.
- output_dir: path to the folder where the LaTeX files will be saved
- tested_merges_path: path to the directory containing the merge results
- merges_path: path to the directory containing all found merges.
- output_dir: path to the directory where the LaTeX files will be saved
"""


Expand Down Expand Up @@ -76,7 +76,7 @@ def latex_def(name, value) -> str:


# Dictonary that lists the different subsets of merge tools for which plots
# and tables are generated. The key is the folder name which will contain all figures
# and tables are generated. The key is the directory name which will contain all figures
# that will be used and the value is the list of plots to contain.
PLOTS = {
"all": [merge_tool.name for merge_tool in MERGE_TOOL],
Expand Down
Loading

0 comments on commit d5a8de8

Please sign in to comment.