Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mike's code review comments #208

Merged
merged 8 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ if [ -d "$CACHE_DIR" ]; then
find "$CACHE_DIR" -name "*.lock" -delete
fi

python3 src/python/clean_cache_placeholders.py \
python3 src/python/delete_cache_placeholders.py \
--cache_dir "$CACHE_DIR"

python3 src/python/write_head_hashes.py \
Expand Down
206 changes: 87 additions & 119 deletions src/python/cache_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env python3
""" Contains all the functions related to the caches.
There will be 4 caches which are stored on disk after running the script:
""" Contains all the functions related to the caches. The functions to interact with each
of the caches are in this file. Each cache is interacted with through the functions
of this file. The caches are all JSON files and are stored in the cache directory.
There will be 4 caches in total which are stored on disk after running the run.sh script:
1) cache/sha_cache_entry: A cache that maps the commit hash to a sha256 hash of the repository.
2) cache/test_cache: A cache that maps a sha256 to test results.
3) cache/merge_results: A cache that maps a merge to the result
Expand Down Expand Up @@ -28,159 +30,61 @@ def slug_repo_name(repo_slug: str) -> str:
return repo_slug.split("/")[1]


def get_cache_lock(repo_slug: str, cache_prefix: Path):
"""Returns a lock for the cache of a repository.
Initially the lock is unlocked; the caller must explictly
lock and unlock the lock.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
Returns:
fasteners.InterProcessLock: A lock for the repository.
"""
lock_path = cache_prefix / "locks" / (slug_repo_name(repo_slug) + ".lock")
lock_path.parent.mkdir(parents=True, exist_ok=True)
lock = fasteners.InterProcessLock(lock_path)
return lock


def get_cache_path(repo_slug: str, cache_prefix: Path) -> Path:
"""Returns the path to the cache file.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
Returns:
Path: The path to the cache file.
"""
cache_file_name = slug_repo_name(repo_slug) + ".json"
cache_path = cache_prefix / cache_file_name
cache_path.parent.mkdir(parents=True, exist_ok=True)
return cache_path


def is_in_cache(
cache_key: Union[Tuple, str], repo_slug: str, cache_prefix: Path
) -> bool:
"""Checks if the key is in the cache.
Args:
cache_key (Union[Tuple,str]): The key to check.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path prefix to the cache directory.
Returns:
bool: True if the repository is in the cache, False otherwise.
"""
cache = load_cache(repo_slug, cache_prefix)
return cache_key in cache


def load_cache(repo_slug: str, cache_prefix: Path) -> dict:
"""Loads the cache.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
Returns:
dict: The cache.
"""
cache_path = get_cache_path(repo_slug, cache_prefix)
if not cache_path.exists():
return {}
with open(cache_path, "r") as f:
cache_data = json.load(f)
return cache_data


def cache_lookup(
cache_key: Union[Tuple, str], repo_slug: str, cache_prefix: Path
) -> dict:
"""Loads the cache and returns the value for the given key.
Args:
cache_key (Union[Tuple,str]): The key to check.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
Returns:
dict: The cache.
"""
cache = load_cache(repo_slug, cache_prefix)
return cache[cache_key]


def write_to_cache(
def set_in_cache(
cache_key: Union[Tuple, str],
cache_value: Union[str, dict, None],
repo_slug: str,
cache_prefix: Path,
overwrite: bool = True,
cache_directory: Path,
acquire_lock: bool = True,
) -> None:
"""Puts an entry in the cache, then writes the cache to disk.
This function is not thread-safe.
Args:
cache_key (Union[Tuple,str]): The key to check.
cache_value (dict): The value to write.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
overwrite (bool, optional) = True: Whether to overwrite an existing cache entry.
If False, attempting to overwrite an existing cache entry throws an exception.
cache_directory (Path): The path to the cache directory.
"""
cache_path = get_cache_path(repo_slug, cache_prefix)
cache = load_cache(repo_slug, cache_prefix)
if cache_prefix != Path("cache-small/sha_cache_entry"):
print("write_to_cache:", cache_key, cache_value, repo_slug, cache_prefix)
if cache_key in cache and not overwrite:
raise ValueError("Cache key already exists")
if acquire_lock:
lock = get_cache_lock(repo_slug, cache_directory)
lock.acquire()
cache_path = get_cache_path(repo_slug, cache_directory)
cache = load_cache(repo_slug, cache_directory)
cache[cache_key] = cache_value
output = json.dumps(cache, indent=4)
with open(cache_path, "w") as f:
f.write(output)
f.flush()


def set_in_cache(
cache_key: Union[Tuple, str],
cache_value: Union[str, dict, None],
repo_slug: str,
cache_prefix: Path,
overwrite: bool = True,
) -> None:
"""Puts an entry in the cache, then writes the cache to disk.
This function is thread-safe.
Args:
cache_key (Union[Tuple,str]): The key to check.
cache_value (dict): The value to write.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
overwrite (bool, optional) = True: Whether to overwrite the cache if it already exists.
"""
lock = get_cache_lock(repo_slug, cache_prefix)
lock.acquire()
write_to_cache(cache_key, cache_value, repo_slug, cache_prefix, overwrite)
lock.release()
if acquire_lock:
lock.release() # type: ignore


def lookup_in_cache(
cache_key: Union[Tuple, str],
repo_slug: str,
cache_prefix: Path,
cache_directory: Path,
set_run: bool = False,
) -> Union[str, dict, None]:
"""Checks if the cache is available and loads a specific entry.
Args:
cache_key (Union[Tuple,str]): The key to check.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_prefix (Path): The path to the cache directory.
cache_directory (Path): The path to the cache directory.
set_run (bool, optional) = False: Wheter to insert an empty cache entry
if it does not exist. This is useful for preventing multiple runs from
attempting to insert the same cache entry.
Returns:
Union[dict,None]: The cache entry if it exists, None otherwise.
"""
lock = get_cache_lock(repo_slug, cache_prefix)
lock = get_cache_lock(repo_slug, cache_directory)
lock.acquire()
cache_entry = get_cache_path(repo_slug, cache_prefix)
cache_entry = get_cache_path(repo_slug, cache_directory)
cache_entry.parent.mkdir(parents=True, exist_ok=True)
if is_in_cache(cache_key, repo_slug, cache_prefix):
if is_in_cache(cache_key, repo_slug, cache_directory):
total_time = 0
while True:
cache_data = cache_lookup(cache_key, repo_slug, cache_prefix)
cache = load_cache(repo_slug, cache_directory)
cache_data = cache[cache_key]
if cache_data is not None:
break
lock.release()
Expand All @@ -192,6 +96,70 @@ def lookup_in_cache(
lock.release()
return cache_data
if set_run:
write_to_cache(cache_key, None, repo_slug, cache_prefix)
set_in_cache(cache_key, None, repo_slug, cache_directory, acquire_lock=False)
lock.release()
return None


# ====================== Internal functions ======================


def get_cache_lock(repo_slug: str, cache_directory: Path):
"""Returns a lock for the cache of a repository.
Initially the lock is unlocked; the caller must explictly
lock and unlock the lock.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_directory (Path): The path to the cache directory.
Returns:
fasteners.InterProcessLock: A lock for the repository.
"""
lock_path = cache_directory / "locks" / (slug_repo_name(repo_slug) + ".lock")
lock_path.parent.mkdir(parents=True, exist_ok=True)
lock = fasteners.InterProcessLock(lock_path)
return lock


def get_cache_path(repo_slug: str, cache_directory: Path) -> Path:
"""Returns the path to the cache file.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_directory (Path): The path to the cache directory.
Returns:
Path: The path to the cache file.
"""
cache_file_name = slug_repo_name(repo_slug) + ".json"
cache_path = cache_directory / cache_file_name
cache_path.parent.mkdir(parents=True, exist_ok=True)
return cache_path


def is_in_cache(
cache_key: Union[Tuple, str], repo_slug: str, cache_directory: Path
) -> bool:
"""Checks if the key is in the cache.
Args:
cache_key (Union[Tuple,str]): The key to check.
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_directory (Path): The path prefix to the cache directory.
Returns:
bool: True if the repository is in the cache, False otherwise.
"""
cache = load_cache(repo_slug, cache_directory)
return cache_key in cache


def load_cache(repo_slug: str, cache_directory: Path) -> dict:
"""Loads the cache associated to the repo_slug found in the cache directory.
Args:
repo_slug (str): The slug of the repository, which is "owner/reponame".
cache_directory (Path): The path to the cache directory.
Returns:
dict: The cache.
"""
cache_path = get_cache_path(repo_slug, cache_directory)
if not cache_path.exists():
return {}
with open(cache_path, "r") as f:
cache_data = json.load(f)
return cache_data
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
result. This script deletes all placeholders from the cache.

Usage:
python clean_cache_placeholders.py --cache_path <path_to_cache>
python delete_cache_placeholders.py --cache_directory <path_to_cache>

Args:
cache_path (str): Path to cache directory
cache_directory (str): Path to cache directory
"""

from argparse import ArgumentParser
Expand All @@ -23,10 +23,10 @@
)
args = parser.parse_args()

cache_path = Path(args.cache_dir)
cache_directory = Path(args.cache_dir)

n_deleted = 0
for file in cache_path.glob("**/*.json"):
for file in cache_directory.glob("**/*.json"):
with open(file, "r") as f:
data = json.load(f)

Expand Down
8 changes: 4 additions & 4 deletions src/python/latex_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
following input files:
- full_repos_csv: csv file containing the full list of repositories
- repos_head_passes_csv: csv file containing the list of repositories whose head passes tests
- tested_merges_path: path to the folder containing the merge results
- merges_path: path to the folder containing all found merges.
- output_dir: path to the folder where the LaTeX files will be saved
- tested_merges_path: path to the directory containing the merge results
- merges_path: path to the directory containing all found merges.
- output_dir: path to the directory where the LaTeX files will be saved
"""


Expand Down Expand Up @@ -76,7 +76,7 @@ def latex_def(name, value) -> str:


# Dictonary that lists the different subsets of merge tools for which plots
# and tables are generated. The key is the folder name which will contain all figures
# and tables are generated. The key is the directory name which will contain all figures
# that will be used and the value is the list of plots to contain.
PLOTS = {
"all": [merge_tool.name for merge_tool in MERGE_TOOL],
Expand Down
Loading