diff --git a/src/python/cache_utils.py b/src/python/cache_utils.py
index 2ef4864ca6..4b8cea5855 100755
--- a/src/python/cache_utils.py
+++ b/src/python/cache_utils.py
@@ -35,7 +35,6 @@ def set_in_cache(
     cache_value: Union[str, dict, None],
     repo_slug: str,
     cache_directory: Path,
-    overwrite: bool = True,
     acquire_lock: bool = True,
 ) -> None:
     """Puts an entry in the cache, then writes the cache to disk.
@@ -45,16 +44,12 @@ def set_in_cache(
         cache_value (dict): The value to write.
         repo_slug (str): The slug of the repository, which is "owner/reponame".
         cache_directory (Path): The path to the cache directory.
-        overwrite (bool, optional) = True: Whether to overwrite an existing cache entry.
-             If False, attempting to overwrite an existing cache entry throws an exception.
     """
     if acquire_lock:
         lock = get_cache_lock(repo_slug, cache_directory)
         lock.acquire()
     cache_path = get_cache_path(repo_slug, cache_directory)
     cache = load_cache(repo_slug, cache_directory)
-    if cache_key in cache and not overwrite:
-        raise ValueError("Cache key already exists")
     cache[cache_key] = cache_value
     output = json.dumps(cache, indent=4)
     with open(cache_path, "w") as f:
diff --git a/src/python/merge_differ.py b/src/python/merge_differ.py
index 574eddef00..cd8e3a60fa 100755
--- a/src/python/merge_differ.py
+++ b/src/python/merge_differ.py
@@ -118,8 +118,6 @@ def merge_differ(args: Tuple[pd.Series, Path]) -> None:
             command.append(str(repo2.repo_path))
             with open(diff_file, "w") as f:
                 subprocess.run(command, stdout=f, stderr=f)
-            del repo2
-        del repo1
 
 
 def diff_file_name(sha1: str, sha2: str) -> Path:
diff --git a/src/python/merge_tester.py b/src/python/merge_tester.py
index d0c51a8701..cf2a396148 100755
--- a/src/python/merge_tester.py
+++ b/src/python/merge_tester.py
@@ -54,10 +54,10 @@ def merge_tester(args: Tuple[str, pd.Series, Path]) -> pd.Series:
     print("merge_tester: Started ", repo_slug, merge_data["left"], merge_data["right"])
 
     merge_data["parents pass"] = False
-    # TODO: It's not clear to me whether "left" and "right" are keys for branches, or shas, or something else.  How about improving the key name?  (We can change the find-merge-conflicts program if it is the source of the problem.)
     for branch in ["left", "right"]:
+        commit_sha = merge_data[branch]
         repo = Repository(repo_slug, cache_directory=cache_directory)
-        repo.checkout(merge_data[branch])
+        repo.checkout(commit_sha)
         tree_fingerprint = repo.compute_tree_fingerprint()
         if tree_fingerprint != merge_data[f"{branch}_tree_fingerprint"]:
             raise Exception(
@@ -112,6 +112,54 @@ def merge_tester(args: Tuple[str, pd.Series, Path]) -> pd.Series:
     return merge_data
 
 
+def build_arguments(
+    args: argparse.Namespace,
+    repo_slug: str,
+) -> list:
+    """Builds the arguments for the merge_tester function.
+    Args:
+        args (argparse.Namespace): The arguments of the script.
+        repo_slug (str): The slug of the repository.
+    Returns:
+        list: The arguments for the merge_tester function.
+    """
+    merge_list_file = Path(
+        os.path.join(args.merges_path, slug_repo_name(repo_slug) + ".csv")
+    )
+    output_file = Path(
+        os.path.join(args.output_dir, slug_repo_name(repo_slug) + ".csv")
+    )
+    if not merge_list_file.exists():
+        print(
+            "merge_tester:",
+            repo_slug,
+            "does not have a list of merges. Missing file: ",
+            merge_list_file,
+        )
+        return []
+
+    if output_file.exists():
+        print(
+            "merge_tester: Skipping",
+            repo_slug,
+            "because it is already computed.",
+        )
+        return []
+    try:
+        merges = pd.read_csv(merge_list_file, header=0, index_col="idx")
+    except pd.errors.EmptyDataError:
+        print(
+            "merge_tester: Skipping",
+            repo_slug,
+            "because it does not contain any merges.",
+        )
+        return []
+    return [
+        (repo_slug, merge_data, Path(args.cache_dir))
+        for _, merge_data in merges.iterrows()
+    ]
+
+
 def main():  # pylint: disable=too-many-locals,too-many-statements
     """Main function"""
     print("merge_tester: Start")
@@ -126,46 +174,11 @@ def main():  # pylint: disable=too-many-locals,too-many-statements
 
     repos = pd.read_csv(args.repos_head_passes_csv, index_col="idx")
 
-    # TODO: I suggest abstracting this loop into a separate function (through printing "number of merges to test").
     print("merge_tester: Started collecting merges to test")
     merge_tester_arguments = []
     for _, repository_data in tqdm(repos.iterrows(), total=len(repos)):
         repo_slug = repository_data["repository"]
-        merge_list_file = Path(
-            os.path.join(args.merges_path, slug_repo_name(repo_slug) + ".csv")
-        )
-        output_file = Path(
-            os.path.join(args.output_dir, slug_repo_name(repo_slug) + ".csv")
-        )
-        if not merge_list_file.exists():
-            print(
-                "merge_tester:",
-                repo_slug,
-                "does not have a list of merges. Missing file: ",
-                merge_list_file,
-            )
-            continue
-
-        if output_file.exists():
-            print(
-                "merge_tester: Skipping",
-                repo_slug,
-                "because it is already computed.",
-            )
-            continue
-        try:
-            merges = pd.read_csv(merge_list_file, header=0, index_col="idx")
-        except pd.errors.EmptyDataError:
-            print(
-                "merge_tester: Skipping",
-                repo_slug,
-                "because it does not contain any merges.",
-            )
-            continue
-        merge_tester_arguments += [
-            (repo_slug, merge_data, Path(args.cache_dir))
-            for _, merge_data in merges.iterrows()
-        ]
+        merge_tester_arguments += build_arguments(args, repo_slug)
 
     # Shuffle input to reduce cache contention
     random.seed(42)
diff --git a/src/python/merge_tools_comparator.py b/src/python/merge_tools_comparator.py
index 4b06bb7295..d149730d9d 100755
--- a/src/python/merge_tools_comparator.py
+++ b/src/python/merge_tools_comparator.py
@@ -69,8 +69,18 @@ def merger(  # pylint: disable=too-many-locals
             merge_data["right"],
             merge_tool.name,
         )
-        cache_data[merge_tool.name] = {"results": [], "log_files": [], "run_time": []}
-        # TODO: I suggest abstracting out the body of this loop as a function.
+        cache_data[merge_tool.name] = {"results": [], "run_time": []}
+        log_file: Path = (
+            cache_directory
+            / "merge_logs"
+            / slug_repo_name(repo_slug)
+            / merge_data["left"]
+            / merge_data["right"]
+            / (merge_tool.name + f".log")
+        )
+        log_file.parent.mkdir(parents=True, exist_ok=True)
+        log_file.unlink(missing_ok=True)
+
         for i in range(N_REPETITIONS):
             repo = Repository(repo_slug, cache_directory=cache_directory)
             (
@@ -86,48 +96,25 @@ def merger(  # pylint: disable=too-many-locals
                 right_commit=merge_data["right"],
                 timeout=TIMEOUT_MERGING,
             )
-            log_file: Path = (
-                cache_directory
-                / "merge_logs"
-                / slug_repo_name(repo_slug)
-                / merge_data["left"]
-                / merge_data["right"]
-                / (merge_tool.name + f".log")
-            )
-            log_file.parent.mkdir(parents=True, exist_ok=True)
-            if log_file.exists():
-                log_file.unlink()
-            with open(log_file, "w") as f:
-                f.write(explanation)
+            if i == 0:  # Only write the log file once.
+                with open(log_file, "w") as f:
+                    f.write(explanation)
+
             cache_data[merge_tool.name]["results"].append(merge_status.name)
             cache_data[merge_tool.name]["log_file"] = str(log_file)
             cache_data[merge_tool.name]["run_time"].append(run_time)
             if "merge_fingerprint" not in cache_data[merge_tool.name]:
                 cache_data[merge_tool.name]["merge_fingerprint"] = merge_fingerprint
-            else:
-                if (
-                    cache_data[merge_tool.name]["merge_fingerprint"]
-                    != merge_fingerprint
-                ):
-                    raise Exception(
-                        "merge_tools_comparator: Merge fingerprint mismatch",
-                        i,
-                        repo_slug,
-                        merge_data["left"],
-                        merge_data["right"],
-                        merge_tool.name,
-                        merge_status,
-                        cache_data[merge_tool.name]["merge_fingerprint"],
-                        merge_fingerprint,
-                        cache_data,
-                    )
-
-            if "left_tree_fingerprint" not in cache_data:
                 cache_data["left_tree_fingerprint"] = left_fingerprint
                 cache_data["right_tree_fingerprint"] = right_fingerprint
             else:
+                assert (
+                    cache_data[merge_tool.name]["merge_fingerprint"]
+                    == merge_fingerprint
+                )
                 assert cache_data["left_tree_fingerprint"] == left_fingerprint
                 assert cache_data["right_tree_fingerprint"] == right_fingerprint
+
     set_in_cache(cache_key, cache_data, repo_slug, merge_cache_directory)
     return cache_data
 
@@ -156,6 +143,64 @@ def check_if_two_merges_differ(cache_data: dict) -> bool:
     return False
 
 
+def build_merge_arguments(args: argparse.Namespace, repo_slug: str):
+    """
+    Creates the arguments for the merger function.
+    Args:
+        args (argparse.Namespace): The arguments to the script.
+        repo_slug (str): The repository slug.
+    Returns:
+        list: A list of arguments for the merger function.
+    """
+    merge_list_file = Path(
+        os.path.join(args.merges_path, slug_repo_name(repo_slug) + ".csv")
+    )
+    output_file = Path(
+        os.path.join(args.output_dir, slug_repo_name(repo_slug) + ".csv")
+    )
+    if not merge_list_file.exists():
+        raise Exception(
+            "merge_tools_comparator:",
+            repo_slug,
+            "does not have a list of merges. Missing file: ",
+            merge_list_file,
+        )
+
+    if output_file.exists():
+        print(
+            "merge_tools_comparator: Skipping",
+            repo_slug,
+            "because it is already computed.",
+        )
+        return []
+
+    merges = pd.read_csv(
+        merge_list_file,
+        names=["idx", "branch_name", "merge", "left", "right", "notes"],
+        dtype={
+            "idx": int,
+            "branch_name": str,
+            "merge": str,
+            "left": str,
+            "right": str,
+            "notes": str,
+        },
+        header=0,
+        index_col="idx",
+    )
+    merges["notes"].replace(np.nan, "", inplace=True)
+
+    if args.only_trivial_merges:
+        merges = merges[merges["notes"].str.contains("a parent is the base")]
+    elif not args.include_trivial_merges:
+        merges = merges[~merges["notes"].str.contains("a parent is the base")]
+    arguments = [
+        (repo_slug, merge_data, Path(args.cache_dir))
+        for _, merge_data in merges.iterrows()
+    ]
+    return arguments
+
+
 if __name__ == "__main__":
     print("merge_tools_comparator: Start")
     parser = argparse.ArgumentParser()
@@ -174,56 +219,8 @@ def check_if_two_merges_differ(cache_data: dict) -> bool:
     print("merge_tools_comparator: Constructing Inputs")
     merger_arguments = []
     for _, repository_data in tqdm(repos.iterrows(), total=len(repos)):
-        # TODO: I suggest abstracting out the body of this loop as a function.
-        merges_repo = []
         repo_slug = repository_data["repository"]
-        merge_list_file = Path(
-            os.path.join(args.merges_path, slug_repo_name(repo_slug) + ".csv")
-        )
-        output_file = Path(
-            os.path.join(args.output_dir, slug_repo_name(repo_slug) + ".csv")
-        )
-        if not merge_list_file.exists():
-            raise Exception(
-                "merge_tools_comparator:",
-                repo_slug,
-                "does not have a list of merges. Missing file: ",
-                merge_list_file,
-            )
-
-        if output_file.exists():
-            print(
-                "merge_tools_comparator: Skipping",
-                repo_slug,
-                "because it is already computed.",
-            )
-            continue
-
-        merges = pd.read_csv(
-            merge_list_file,
-            names=["idx", "branch_name", "merge", "left", "right", "notes"],
-            dtype={
-                "idx": int,
-                "branch_name": str,
-                "merge": str,
-                "left": str,
-                "right": str,
-                "notes": str,
-            },
-            header=0,
-            index_col="idx",
-        )
-        merges["notes"].replace(np.nan, "", inplace=True)
-
-        if args.only_trivial_merges:
-            merges = merges[merges["notes"].str.contains("a parent is the base")]
-        elif not args.include_trivial_merges:
-            merges = merges[~merges["notes"].str.contains("a parent is the base")]
-
-        merger_arguments += [
-            (repo_slug, merge_data, Path(args.cache_dir))
-            for _, merge_data in merges.iterrows()
-        ]
+        merger_arguments += build_merge_arguments(args, repo_slug)
 
     # Shuffle input to reduce cache contention
     random.seed(42)
diff --git a/src/scripts/merge_tools/resolve-import-conflicts-in-file.py b/src/scripts/merge_tools/resolve-import-conflicts-in-file.py
index 864a4a8bbc..36f2750419 100755
--- a/src/scripts/merge_tools/resolve-import-conflicts-in-file.py
+++ b/src/scripts/merge_tools/resolve-import-conflicts-in-file.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 # TODO: Benedikt, could you review and improve this file?  I'm sure it is not perfect Python style.
 
 """Edits a file in place to remove conflict markers related to Java imports.
@@ -12,6 +12,7 @@
 import shutil
 import sys
 import tempfile
+from typing import List, Union, Tuple
 
 if len(sys.argv) != 2:
     print(
@@ -24,12 +25,12 @@
     lines = file.readlines()
 
 
-def all_import_lines(lines):
+def all_import_lines(lines: List[str]) -> bool:
     """Return true if every line is a Java import line."""
     return all(line.startswith("import ") for line in lines)
 
 
-def merge(base, parent1, parent2):
+def merge(base, parent1: List[str], parent2: List[str]) -> Union[List[str], None]:
     """Given text for the base and two parents, return merged text.
 
     This currently does a simplistic merge that retains all lines in either parent.
@@ -40,7 +41,7 @@ def merge(base, parent1, parent2):
         parent2: a list of lines
 
     Returns:
-        a list of lines, or None if it cannot do merging.
+        Union[List[str], None]: If the merge is successful, returns a list of lines.
     """
 
     if (
@@ -53,7 +54,9 @@ def merge(base, parent1, parent2):
     return None
 
 
-def looking_at_conflict(start_index, lines):  # pylint: disable=R0911
+def looking_at_conflict(  # pylint: disable=too-many-return-statements
+    start_index: int, lines: List[str]
+) -> Union[None, Tuple[List[str], List[str], List[str], int]]:
     """Tests whether the text starting at line `start_index` is the beginning of a conflict.
     If not, returns None.
     If so, returns a 4-tuple of (base, parent1, parent2, num_lines_in_conflict)
@@ -82,7 +85,7 @@ def looking_at_conflict(start_index, lines):  # pylint: disable=R0911
         if index == num_lines:
             print(
                 "Starting at line "
-                + start_index
+                + str(start_index)
                 + ", did not find ||||||| or ======= in "
                 + filename
             )
@@ -98,7 +101,7 @@ def looking_at_conflict(start_index, lines):  # pylint: disable=R0911
             if index == num_lines:
                 print(
                     "Starting at line "
-                    + start_index
+                    + str(start_index)
                     + ", did not find ======= in "
                     + filename
                 )
@@ -114,7 +117,7 @@ def looking_at_conflict(start_index, lines):  # pylint: disable=R0911
         if index == num_lines:
             print(
                 "Starting at line "
-                + start_index
+                + str(start_index)
                 + ", did not find >>>>>>> in "
                 + filename
             )
diff --git a/src/scripts/merge_tools/spork.sh b/src/scripts/merge_tools/spork.sh
index 3e38375de0..7748d5f2e2 100755
--- a/src/scripts/merge_tools/spork.sh
+++ b/src/scripts/merge_tools/spork.sh
@@ -13,8 +13,8 @@ if [ "$#" -ne 3 ]; then
   exit 1
 fi
 
-# TODO: This script is an odd place to put this `killall` command.  Should it be elsewhere in the experimental infrastructure?
 # Kill all Java processes that are running for over an hour (to avoid memory leaks).
+# Spork tends to create Java processes that don't terminate even when the parent process is killed.
 killall -9 java --older-than 1h
 
 SCRIPT_PATH="$(dirname "$0")"; SCRIPT_PATH="$(eval "cd \"$SCRIPT_PATH\" && pwd")"
diff --git a/src/scripts/run_repo_tests.sh b/src/scripts/run_repo_tests.sh
index fc032b38b2..1d647d9b27 100755
--- a/src/scripts/run_repo_tests.sh
+++ b/src/scripts/run_repo_tests.sh
@@ -7,9 +7,8 @@
 
 set -o nounset
 
-# TODO: This is too agressive and will interfere with the user's other work.
-# TODO: Instead, create a directory under /tmp (or, better, under /tmp/$USER) for the AST merging experiments, and clean it.
-# Test side effects can be seen in the /tmp directory.
+# Test side effects can be seen in the /tmp directory which are created by 
+# testing infrastructure. We delete them to avoid filling up the disk.
 # We delete all the files older than 2h and owned by the current user.
 find /tmp -maxdepth 1 -user "$(whoami)" -mmin +120 -exec rm -rf {} \;