From e039b27aefdb7bd936c463d2d7e97366f3a12f46 Mon Sep 17 00:00:00 2001
From: Bryan Andrews <bja43@pitt.edu>
Date: Sun, 14 Jul 2024 15:54:14 -0500
Subject: [PATCH 1/7] introduced a way to use the local score object without
 caching

---
 causallearn/score/LocalScoreFunctionClass.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/causallearn/score/LocalScoreFunctionClass.py b/causallearn/score/LocalScoreFunctionClass.py
index 41ffb49f..8171d515 100644
--- a/causallearn/score/LocalScoreFunctionClass.py
+++ b/causallearn/score/LocalScoreFunctionClass.py
@@ -46,3 +46,9 @@ def score(self, i: int, PAi: List[int]) -> float:
                 self.score_cache[i][hash_key] = self.local_score_fun(self.data, i, PAi, self.parameters)
 
         return self.score_cache[i][hash_key]
+
+    def score_nocache(self, i: int, PAi: List[int]) -> float:
+        if self.local_score_fun == local_score_BIC_from_cov:
+            return self.local_score_fun((self.cov, self.n), i, PAi, self.parameters)
+        else:
+            return self.local_score_fun(self.data, i, PAi, self.parameters)
\ No newline at end of file

From 54e752b587d3e59fd879322b962da15c0a439890 Mon Sep 17 00:00:00 2001
From: Bryan Andrews <bja43@pitt.edu>
Date: Mon, 15 Jul 2024 15:31:02 -0500
Subject: [PATCH 2/7] removing maxP because it dodnt do anything

---
 causallearn/search/PermutationBased/GRaSP.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py
index abcbf47c..6fcf2121 100644
--- a/causallearn/search/PermutationBased/GRaSP.py
+++ b/causallearn/search/PermutationBased/GRaSP.py
@@ -78,7 +78,6 @@ def grasp(
     X: np.ndarray,
     score_func: str = "local_score_BIC",
     depth: Optional[int] = 3,
-    maxP: Optional[float] = None,
     parameters: Optional[Dict[str, Any]] = None,
     verbose: Optional[bool] = True,
     node_names: Optional[List[str]] = None,
@@ -92,7 +91,6 @@ def grasp(
     score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general',
                     'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BDeu')).
     depth : allowed maximum depth for DFS
-    maxP : allowed maximum number of parents when searching the graph
     parameters : when using CV likelihood,
                   parameters['kfold']: k-fold cross validation
                   parameters['lambda']: regularization parameter
@@ -118,8 +116,6 @@ def grasp(
                 "kfold": 10,  # 10 fold cross validation
                 "lambda": 0.01,
             }  # regularization parameter
-        if maxP is None:
-            maxP = X.shape[1] / 2  # maximum number of parents
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_general, parameters=parameters
         )
@@ -128,8 +124,6 @@ def grasp(
         score_func == "local_score_marginal_general"
     ):  # negative marginal likelihood based on regression in RKHS
         parameters = {}
-        if maxP is None:
-            maxP = X.shape[1] / 2  # maximum number of parents
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_general, parameters=parameters
         )
@@ -146,8 +140,6 @@ def grasp(
             }  # regularization parameter
             for i in range(X.shape[1]):
                 parameters["dlabel"]["{}".format(i)] = i
-        if maxP is None:
-            maxP = len(parameters["dlabel"]) / 2
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_multi, parameters=parameters
         )
@@ -160,8 +152,6 @@ def grasp(
             parameters = {"dlabel": {}}
             for i in range(X.shape[1]):
                 parameters["dlabel"]["{}".format(i)] = i
-        if maxP is None:
-            maxP = len(parameters["dlabel"]) / 2
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_multi, parameters=parameters
         )
@@ -169,15 +159,11 @@ def grasp(
     elif score_func == "local_score_BIC":  # Greedy equivalence search with BIC score
         parameters = {}
         parameters["lambda_value"] = 2
-        if maxP is None:
-            maxP = X.shape[1] / 2
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC, parameters=parameters
         )
 
     elif score_func == "local_score_BDeu":  # Greedy equivalence search with BDeu score
-        if maxP is None:
-            maxP = X.shape[1] / 2
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BDeu, parameters=None
         )

From 76f7956f42f6817f0ad760989e154231650b5e1d Mon Sep 17 00:00:00 2001
From: Bryan Andrews <bja43@pitt.edu>
Date: Tue, 16 Jul 2024 06:50:24 -0500
Subject: [PATCH 3/7] fixed grasp  unittest

---
 causallearn/search/PermutationBased/GRaSP.py | 11 ++++++-
 tests/TestGRaSP.py                           | 31 +++++++++-----------
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py
index 6fcf2121..fcff6c0d 100644
--- a/causallearn/search/PermutationBased/GRaSP.py
+++ b/causallearn/search/PermutationBased/GRaSP.py
@@ -10,6 +10,7 @@
 from causallearn.score.LocalScoreFunction import (
     local_score_BDeu,
     local_score_BIC,
+    local_score_BIC_from_cov,
     local_score_cv_general,
     local_score_cv_multi,
     local_score_marginal_general,
@@ -76,7 +77,7 @@ def len(self):
 
 def grasp(
     X: np.ndarray,
-    score_func: str = "local_score_BIC",
+    score_func: str = "local_score_BIC_from_cov",
     depth: Optional[int] = 3,
     parameters: Optional[Dict[str, Any]] = None,
     verbose: Optional[bool] = True,
@@ -157,12 +158,20 @@ def grasp(
         )
 
     elif score_func == "local_score_BIC":  # Greedy equivalence search with BIC score
+        warnings.warn("Please use 'local_score_BIC_from_cov' instead")
         parameters = {}
         parameters["lambda_value"] = 2
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC, parameters=parameters
         )
 
+    elif score_func == "local_score_BIC_from_cov":  # Greedy equivalence search with BIC score
+        parameters = {}
+        parameters["lambda_value"] = 2
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
+        )
+
     elif score_func == "local_score_BDeu":  # Greedy equivalence search with BDeu score
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BDeu, parameters=None
diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py
index 7482dcee..de44ae1e 100644
--- a/tests/TestGRaSP.py
+++ b/tests/TestGRaSP.py
@@ -66,7 +66,8 @@ def parameterize_dag(g):
 
 class TestGRaSP(unittest.TestCase):
     def test_grasp(self):
-        ps = [30, 60]
+        # ps = [30, 60]
+        ps = [30]
         ds = [0.1, 0.15]
         n = 1000
         reps = 5
@@ -77,19 +78,15 @@ def test_grasp(self):
                 for rep in range(1, reps + 1):
                     g0 = random_dag(p, d)
                     print(
-                        "\nNodes:",
-                        p,
-                        "| Edges:",
-                        np.sum(g0),
-                        "| Avg Degree:",
-                        2 * np.sum(g0) / p,
-                        "| Rep:",
-                        rep,
+                        "\nNodes:", p,
+                        "| Edges:", np.sum(g0),
+                        "| Avg Degree:", 2 * np.sum(g0) / p,
+                        "| Rep:", rep,
                     )
                     cov = parameterize_dag(g0)
                     X = np.random.multivariate_normal(np.zeros(p), cov, n)
 
-                    node_names = [("x%d" % i) for i in range(p)]
+                    node_names = [("X%d" % (i + 1)) for i in range(p)]
                     nodes = []
 
                     for name in node_names:
@@ -105,13 +102,13 @@ def test_grasp(self):
 
                     G = grasp(X)
 
-                    pyd = GraphUtils.to_pydot(G)
-                    tmp_png = pyd.create_png(f="png")
-                    fp = io.BytesIO(tmp_png)
-                    img = mpimg.imread(fp, format='png')
-                    plt.axis('off')
-                    plt.imshow(img)
-                    plt.show()
+                    # pyd = GraphUtils.to_pydot(G)
+                    # tmp_png = pyd.create_png(f="png")
+                    # fp = io.BytesIO(tmp_png)
+                    # img = mpimg.imread(fp, format='png')
+                    # plt.axis('off')
+                    # plt.imshow(img)
+                    # plt.show()
 
                     AdjC = AdjacencyConfusion(G0, G)
                     stats[0].append(AdjC.get_adj_precision())

From cebe49b8c25a8234c877ca06e6d1a8debddfa320 Mon Sep 17 00:00:00 2001
From: Bryan Andrews <bja43@pitt.edu>
Date: Tue, 16 Jul 2024 20:27:18 -0500
Subject: [PATCH 4/7] adding gsts and boss

---
 causallearn/search/PermutationBased/BOSS.py  | 214 +++++++++++++++++++
 causallearn/search/PermutationBased/GRaSP.py |  65 +++---
 causallearn/search/PermutationBased/gst.py   |  77 +++++++
 tests/TestBOSS.py                            | 124 +++++++++++
 tests/TestGRaSP.py                           |  59 ++---
 5 files changed, 476 insertions(+), 63 deletions(-)
 create mode 100644 causallearn/search/PermutationBased/BOSS.py
 create mode 100644 causallearn/search/PermutationBased/gst.py
 create mode 100644 tests/TestBOSS.py

diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py
new file mode 100644
index 00000000..b72d8f44
--- /dev/null
+++ b/causallearn/search/PermutationBased/BOSS.py
@@ -0,0 +1,214 @@
+import random
+import sys
+import time
+import warnings
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+from causallearn.graph.GeneralGraph import GeneralGraph
+from causallearn.graph.GraphNode import GraphNode
+from causallearn.score.LocalScoreFunction import (
+    local_score_BDeu,
+    local_score_BIC,
+    local_score_BIC_from_cov,
+    local_score_cv_general,
+    local_score_cv_multi,
+    local_score_marginal_general,
+    local_score_marginal_multi,
+)
+from causallearn.search.PermutationBased.gst import GST;
+from causallearn.score.LocalScoreFunctionClass import LocalScoreClass
+from causallearn.utils.DAG2CPDAG import dag2cpdag
+
+
+def boss(
+    X: np.ndarray,
+    score_func: str = "local_score_BIC_from_cov",
+    parameters: Optional[Dict[str, Any]] = None,
+    verbose: Optional[bool] = True,
+    node_names: Optional[List[str]] = None,
+) -> GeneralGraph:
+    """
+    Perform a best order score search (BOSS) algorithm
+
+    Parameters
+    ----------
+    X : data set (numpy ndarray), shape (n_samples, n_features). The input data, where n_samples is the number of samples and n_features is the number of features.
+    score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general',
+                    'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BIC_from_cov', 'local_score_BDeu')).
+    parameters : when using CV likelihood,
+                  parameters['kfold']: k-fold cross validation
+                  parameters['lambda']: regularization parameter
+                  parameters['dlabel']: for variables with multi-dimensions,
+                               indicate which dimensions belong to the i-th variable.
+    verbose : whether to print the time cost and verbose output of the algorithm.
+
+    Returns
+    -------
+    G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j.
+    """
+
+    n, p = X.shape
+    if n < p:
+        warnings.warn("The number of features is much larger than the sample size!")
+
+    X = np.mat(X)
+    if score_func == "local_score_CV_general":  
+        # % k-fold negative cross validated likelihood based on regression in RKHS
+        if parameters is None:
+            parameters = {
+                "kfold": 10,  # 10 fold cross validation
+                "lambda": 0.01,
+            }  # regularization parameter
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_cv_general, parameters=parameters
+        )
+
+    elif score_func == "local_score_marginal_general":  
+        # negative marginal likelihood based on regression in RKHS
+        parameters = {}
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_marginal_general, parameters=parameters
+        )
+
+    elif score_func == "local_score_CV_multi":  
+        # k-fold negative cross validated likelihood based on regression in RKHS
+        # for data with multi-variate dimensions
+        if parameters is None:
+            parameters = {
+                "kfold": 10,
+                "lambda": 0.01,
+                "dlabel": {},
+            }  # regularization parameter
+            for i in range(X.shape[1]):
+                parameters["dlabel"]["{}".format(i)] = i
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_cv_multi, parameters=parameters
+        )
+
+    elif score_func == "local_score_marginal_multi":  
+        # negative marginal likelihood based on regression in RKHS
+        # for data with multi-variate dimensions
+        if parameters is None:
+            parameters = {"dlabel": {}}
+            for i in range(X.shape[1]):
+                parameters["dlabel"]["{}".format(i)] = i
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_marginal_multi, parameters=parameters
+        )
+
+    elif score_func == "local_score_BIC":  
+        # SEM BIC score
+        warnings.warn("Please use 'local_score_BIC_from_cov' instead")
+        parameters = {}
+        parameters["lambda_value"] = 2
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_BIC, parameters=parameters
+        )
+
+    elif score_func == "local_score_BIC_from_cov":  
+        # SEM BIC score
+        parameters = {}
+        parameters["lambda_value"] = 2
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
+        )
+
+    elif score_func == "local_score_BDeu":  
+        # BDeu score
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_BDeu, parameters=None
+        )
+
+    else:
+        raise Exception("Unknown function!")
+    score = localScoreClass
+
+    gsts = [GST(i, score) for i in range(p)]
+
+    node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names
+    nodes = []
+
+    for name in node_names:
+        node = GraphNode(name)
+        nodes.append(node)
+
+    G = GeneralGraph(nodes)
+
+    runtime = time.perf_counter()
+        
+    order = [v for v in range(p)]
+
+    gsts = [GST(v, score) for v in order]
+    parents = {v: [] for v in order}
+    
+    variables = [v for v in order]
+    while True:
+        improved = False
+        random.shuffle(variables)
+        if verbose:
+            for i, v in enumerate(order):
+                parents[v].clear()
+                gsts[v].trace(order[:i], parents[v])
+            sys.stdout.write("\rBOSS edge count: %i    " % np.sum([len(parents[v]) for v in range(p)]))
+            sys.stdout.flush()
+
+        for v in variables:
+            improved |= better_mutation(v, order, gsts)
+        if not improved: break
+
+    for i, v in enumerate(order):
+        parents[v].clear()
+        gsts[v].trace(order[:i], parents[v])
+
+    runtime = time.perf_counter() - runtime
+    
+    if verbose:
+        sys.stdout.write("\nBOSS completed in: %.2fs \n" % runtime)
+        sys.stdout.flush()
+
+    for y in range(p):
+        for x in parents[y]:
+            G.add_directed_edge(nodes[x], nodes[y])
+
+    G = dag2cpdag(G)
+
+    return G
+
+
+def reversed_enumerate(iter, j):
+    for w in reversed(iter):
+        yield j, w
+        j -= 1
+
+
+def better_mutation(v, order, gsts):
+    i = order.index(v)
+    p = len(order)
+    scores = np.zeros(p + 1)
+
+    prefix = []
+    score = 0
+    for j, w in enumerate(order):
+        scores[j] = gsts[v].trace(prefix) + score
+        if v != w:
+            score += gsts[w].trace(prefix)
+            prefix.append(w)
+
+    scores[p] = gsts[v].trace(prefix) + score
+    best = p
+
+    prefix.append(v)
+    score = 0
+    for j, w in reversed_enumerate(order, p - 1):
+        if v != w:
+            prefix.remove(w)
+            score += gsts[w].trace(prefix)
+        scores[j] += score
+        if scores[j] > scores[best]: best = j
+        
+    if scores[i] + 1e-6 > scores[best]: return False
+    order.remove(v)
+    order.insert(best - int(best > i), v)
+
+    return True
\ No newline at end of file
diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py
index fcff6c0d..09416b94 100644
--- a/causallearn/search/PermutationBased/GRaSP.py
+++ b/causallearn/search/PermutationBased/GRaSP.py
@@ -16,6 +16,7 @@
     local_score_marginal_general,
     local_score_marginal_multi,
 )
+from causallearn.search.PermutationBased.gst import GST;
 from causallearn.score.LocalScoreFunctionClass import LocalScoreClass
 from causallearn.utils.DAG2CPDAG import dag2cpdag
 
@@ -34,6 +35,7 @@ def __init__(self, p, score):
             y = self.order[i]
             self.parents[y] = []
             self.local_scores[y] = -score.score(y, [])
+            # self.local_scores[y] = -score.score_nocache(y, [])
 
     def get(self, i):
         return self.order[i]
@@ -109,9 +111,8 @@ def grasp(
         warnings.warn("The number of features is much larger than the sample size!")
 
     X = np.mat(X)
-    if (
-        score_func == "local_score_CV_general"
-    ):  # % k-fold negative cross validated likelihood based on regression in RKHS
+    if score_func == "local_score_CV_general":  
+        # k-fold negative cross validated likelihood based on regression in RKHS
         if parameters is None:
             parameters = {
                 "kfold": 10,  # 10 fold cross validation
@@ -121,17 +122,15 @@ def grasp(
             data=X, local_score_fun=local_score_cv_general, parameters=parameters
         )
 
-    elif (
-        score_func == "local_score_marginal_general"
-    ):  # negative marginal likelihood based on regression in RKHS
+    elif score_func == "local_score_marginal_general":
+        # negative marginal likelihood based on regression in RKHS
         parameters = {}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_general, parameters=parameters
         )
 
-    elif (
-        score_func == "local_score_CV_multi"
-    ):  # k-fold negative cross validated likelihood based on regression in RKHS
+    elif score_func == "local_score_CV_multi": 
+        # k-fold negative cross validated likelihood based on regression in RKHS
         # for data with multi-variate dimensions
         if parameters is None:
             parameters = {
@@ -145,9 +144,8 @@ def grasp(
             data=X, local_score_fun=local_score_cv_multi, parameters=parameters
         )
 
-    elif (
-        score_func == "local_score_marginal_multi"
-    ):  # negative marginal likelihood based on regression in RKHS
+    elif score_func == "local_score_marginal_multi":  
+        # negative marginal likelihood based on regression in RKHS
         # for data with multi-variate dimensions
         if parameters is None:
             parameters = {"dlabel": {}}
@@ -157,7 +155,8 @@ def grasp(
             data=X, local_score_fun=local_score_marginal_multi, parameters=parameters
         )
 
-    elif score_func == "local_score_BIC":  # Greedy equivalence search with BIC score
+    elif score_func == "local_score_BIC":  
+        # SEM BIC score
         warnings.warn("Please use 'local_score_BIC_from_cov' instead")
         parameters = {}
         parameters["lambda_value"] = 2
@@ -165,14 +164,16 @@ def grasp(
             data=X, local_score_fun=local_score_BIC, parameters=parameters
         )
 
-    elif score_func == "local_score_BIC_from_cov":  # Greedy equivalence search with BIC score
+    elif score_func == "local_score_BIC_from_cov":  
+        # SEM BIC score
         parameters = {}
         parameters["lambda_value"] = 2
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
         )
 
-    elif score_func == "local_score_BDeu":  # Greedy equivalence search with BDeu score
+    elif score_func == "local_score_BDeu":  
+        # BDeu score
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BDeu, parameters=None
         )
@@ -181,6 +182,8 @@ def grasp(
         raise Exception("Unknown function!")
     score = localScoreClass
 
+    gsts = [GST(i, score) for i in range(p)]
+
     node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names
     nodes = []
 
@@ -198,12 +201,15 @@ def grasp(
         y_parents = order.get_parents(y)
 
         candidates = [order.get(j) for j in range(0, i)]
+        # local_score = gsts[y].trace(candidates)
+
         grow(y, y_parents, candidates, score)
         local_score = shrink(y, y_parents, score)
+        
         order.set_local_score(y, local_score)
         order.bump_edges(len(y_parents))
 
-    while dfs(depth - 1, set(), [], order, score):
+    while dfs(depth - 1, set(), [], order, score, gsts):
         if verbose:
             sys.stdout.write("\rGRaSP edge count: %i    " % order.get_edges())
             sys.stdout.flush()
@@ -224,7 +230,7 @@ def grasp(
 
 
 # performs a dfs over covered tucks
-def dfs(depth: int, flipped: set, history: List[set], order, score):
+def dfs(depth: int, flipped: set, history: List[set], order, score, gsts):
 
     cache = [{}, {}, {}, 0]
 
@@ -252,7 +258,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score):
             cache[3] = order.get_edges()
 
             tuck(i, j, order)
-            edge_bump, score_bump = update(i, j, order, score)
+            edge_bump, score_bump = update(i, j, order, score, gsts)
 
             # because things that should be zero sometimes are not
             if score_bump > 1e-6:
@@ -271,7 +277,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score):
 
                 if len(flipped) > 0 and flipped not in history:
                     history.append(flipped)
-                    if depth > 0 and dfs(depth - 1, flipped, history, order, score):
+                    if depth > 0 and dfs(depth - 1, flipped, history, order, score, gsts):
                         return True
                     del history[-1]
 
@@ -286,7 +292,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score):
 
 
 # updates the parents and scores after a tuck
-def update(i: int, j: int, order, score):
+def update(i: int, j: int, order, score, gsts):
 
     edge_bump = 0
     old_score = 0
@@ -299,18 +305,21 @@ def update(i: int, j: int, order, score):
         edge_bump -= len(z_parents)
         old_score += order.get_local_score(z)
 
+        z_parents.clear()
         candidates = [order.get(l) for l in range(0, k)]
 
-        for w in [w for w in z_parents if w not in candidates]:
-            z_parents.remove(w)
-        shrink(z, z_parents, score)
+        # for w in [w for w in z_parents if w not in candidates]:
+        #     z_parents.remove(w)
+        # shrink(z, z_parents, score)
 
-        for w in z_parents:
-            candidates.remove(w)
+        # for w in z_parents:
+        #     candidates.remove(w)
 
-        grow(z, z_parents, candidates, score)
+        # local_score = gsts[z].trace(candidates, z_parents)
 
+        grow(z, z_parents, candidates, score)
         local_score = shrink(z, z_parents, score)
+
         order.set_local_score(z, local_score)
 
         edge_bump += len(z_parents)
@@ -323,6 +332,7 @@ def update(i: int, j: int, order, score):
 def grow(y: int, y_parents: List[int], candidates: List[int], score):
 
     best = -score.score(y, y_parents)
+    # best = -score.score_nocache(y, y_parents)
 
     add = None
     checked = []
@@ -340,6 +350,7 @@ def grow(y: int, y_parents: List[int], candidates: List[int], score):
             x = candidates.pop()
             y_parents.append(x)
             current = -score.score(y, y_parents)
+            # current = -score.score_nocache(y, y_parents)
             y_parents.remove(x)
             checked.append(x)
 
@@ -354,6 +365,7 @@ def grow(y: int, y_parents: List[int], candidates: List[int], score):
 def shrink(y: int, y_parents: List[int], score):
 
     best = -score.score(y, y_parents)
+    # best = -score.score_nocache(y, y_parents)
 
     remove = None
     checked = 0
@@ -367,6 +379,7 @@ def shrink(y: int, y_parents: List[int], score):
         while checked < len(y_parents):
             x = y_parents.pop(0)
             current = -score.score(y, y_parents)
+            # current = -score.score_nocache(y, y_parents)
             y_parents.append(x)
             checked += 1
 
diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py
new file mode 100644
index 00000000..25e0da6b
--- /dev/null
+++ b/causallearn/search/PermutationBased/gst.py
@@ -0,0 +1,77 @@
+class GSTNode:
+
+    def __init__(self, tree, add=None, score=None):
+        if score is None: score = -tree.score.score_nocache(tree.vertex, [])
+        # if score is None: score = -tree.score.score(tree.vertex, [])
+        self.tree = tree
+        self.add = add
+        self.grow_score = score
+        self.shrink_score = score
+        self.branches = None
+        self.remove = None
+
+    def __lt__(self, other):
+        return self.grow_score < other.grow_score
+
+    def grow(self, available, parents):
+        self.branches = []
+        for add in available:
+            parents.append(add)
+            score = -self.tree.score.score_nocache(self.tree.vertex, parents)
+            # score = -self.tree.score.score(self.tree.vertex, parents)
+            parents.remove(add)
+            branch = GSTNode(self.tree, add, score)
+            if score > self.grow_score: self.branches.append(branch)
+        self.branches.sort()
+
+    def shrink(self, parents):
+        self.remove = []
+        while True:
+            best = None
+            for remove in [parent for parent in parents]:
+                parents.remove(remove)
+                score = -self.tree.score.score_nocache(self.tree.vertex, parents)
+                # score = -self.tree.score.score(self.tree.vertex, parents)
+                parents.append(remove)
+                if score > self.shrink_score:
+                    self.shrink_score = score
+                    best = remove
+            if best is None: break
+            self.remove.append(best)
+            parents.remove(best)
+
+    def trace(self, prefix, available, parents):
+        if self.branches is None: self.grow(available, parents)
+        for branch in self.branches:
+            available.remove(branch.add)
+            if branch.add in prefix:
+                parents.append(branch.add)
+                return branch.trace(prefix, available, parents)
+        if self.remove is None:
+            self.shrink(parents)
+            return self.shrink_score
+        for remove in self.remove: parents.remove(remove)
+        return self.shrink_score
+
+
+class GST:
+
+    def __init__(self, vertex, score):
+        self.vertex = vertex
+        self.score = score
+        self.root = GSTNode(self)
+        self.forbidden = [vertex]
+        self.required = []
+
+    def trace(self, prefix, parents=None):
+        if parents is None: parents = []
+        available = [i for i in range(self.score.data.shape[1]) if i not in self.forbidden]
+        return self.root.trace(prefix, available, parents)
+
+    def set_knowledge(self, forbidden, required):
+        self.forbidden = forbidden  # not implemented
+        self.required = required  # not implemented
+        self.reset()
+
+    def reset(self):
+        self.root = GSTNode(self)
\ No newline at end of file
diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py
new file mode 100644
index 00000000..1d87d235
--- /dev/null
+++ b/tests/TestBOSS.py
@@ -0,0 +1,124 @@
+import unittest
+
+import numpy as np
+from causallearn.graph.AdjacencyConfusion import AdjacencyConfusion
+from causallearn.graph.ArrowConfusion import ArrowConfusion
+from causallearn.graph.GeneralGraph import GeneralGraph
+from causallearn.graph.GraphNode import GraphNode
+from causallearn.search.PermutationBased.BOSS import boss
+from causallearn.utils.DAG2CPDAG import dag2cpdag
+from causallearn.utils.GraphUtils import GraphUtils
+import matplotlib.image as mpimg
+import matplotlib.pyplot as plt
+import io
+
+
+def simulate_data(p, d, n):
+    """
+    Randomly generates an Erdos-Renyi direct acyclic graph given an ordering 
+    and randomly simulates data with the provided parameters.
+
+    p = |variables|
+    d = |edges| / |possible edges|
+    n = sample size
+    """
+
+    # npe = |possible edges|
+    pe = int(p * (p - 1) / 2)
+
+    # ne = |edges|
+    ne = int(d * pe)
+
+    # generate edges
+    e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne))
+    np.random.shuffle(e)
+    B = np.zeros([p, p])
+    B.T[np.triu_indices(p, 1)] = e
+
+    # generate variance terms
+    O = np.diag(np.ones(p))
+
+    # simulate data
+    X = np.random.multivariate_normal(np.zeros(p), O, n)
+    for i in range(p):
+        J = np.where(B[i])[0]
+        for j in J: X[:, i] += B[i, j] * X[:, j]
+
+    pi = [i for i in range(p)]
+    np.random.shuffle(pi)
+
+    return (B != 0)[pi][:, pi], X[:, pi]
+
+
+class TestBOSS(unittest.TestCase):
+    def test_boss(self):
+        ps = [30, 60]
+        # ps = [30]
+        ds = [0.1, 0.15]
+        n = 1000
+        reps = 5
+
+        for p in ps:
+            for d in ds:
+                stats = [[], [], [], []]
+                for rep in range(1, reps + 1):
+                    g0, X = simulate_data(p, d, n)
+                    print(
+                        "\nNodes:", p,
+                        "| Edges:", np.sum(g0),
+                        "| Avg Degree:", 2 * np.sum(g0) / p,
+                        "| Rep:", rep,
+                    )
+
+                    node_names = [("X%d" % (i + 1)) for i in range(p)]
+                    nodes = []
+
+                    for name in node_names:
+                        node = GraphNode(name)
+                        nodes.append(node)
+
+                    G0 = GeneralGraph(nodes)
+                    for y in range(p):
+                        for x in np.where(g0[y] == 1)[0]:
+                            G0.add_directed_edge(nodes[x], nodes[y])
+
+                    G0 = dag2cpdag(G0)
+
+                    G = boss(X)
+
+                    # pyd = GraphUtils.to_pydot(G)
+                    # tmp_png = pyd.create_png(f="png")
+                    # fp = io.BytesIO(tmp_png)
+                    # img = mpimg.imread(fp, format='png')
+                    # plt.axis('off')
+                    # plt.imshow(img)
+                    # plt.show()
+
+                    AdjC = AdjacencyConfusion(G0, G)
+                    stats[0].append(AdjC.get_adj_precision())
+                    stats[1].append(AdjC.get_adj_recall())
+
+                    ArrC = ArrowConfusion(G0, G)
+                    stats[2].append(ArrC.get_arrows_precision())
+                    stats[3].append(ArrC.get_arrows_recall())
+
+                    print(
+                        [
+                            ["AP", "AR", "AHP", "AHR"][i]
+                            + ": "
+                            + str(round(stats[i][-1], 2))
+                            for i in range(4)
+                        ]
+                    )
+
+                print(
+                    "\nOverall Stats: \n",
+                    [
+                        ["AP", "AR", "AHP", "AHR"][i]
+                        + ": "
+                        + str(round(np.sum(stats[i]) / reps, 2))
+                        for i in range(4)
+                    ],
+                )
+
+        print("finish")
diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py
index de44ae1e..1a8e0638 100644
--- a/tests/TestGRaSP.py
+++ b/tests/TestGRaSP.py
@@ -11,63 +11,50 @@
 import matplotlib.image as mpimg
 import matplotlib.pyplot as plt
 import io
+import random
 
 
-def random_dag(p, d):
+def simulate_data(p, d, n):
     """
-    Randomly generates an Erdos-Renyi direct acyclic graph given an ordering.
+    Randomly generates an Erdos-Renyi direct acyclic graph given an ordering 
+    and randomly simulates data with the provided parameters.
 
     p = |variables|
     d = |edges| / |possible edges|
+    n = sample size
     """
 
     # npe = |possible edges|
     pe = int(p * (p - 1) / 2)
 
-    # e = |edges|
+    # ne = |edges|
     ne = int(d * pe)
 
     # generate edges
-    e = np.append(np.zeros(pe - ne, "uint8"), np.ones(ne, "uint8"))
+    e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne))
     np.random.shuffle(e)
-
-    # generate graph
-    g = np.zeros([p, p], "uint8")
-    g.T[np.triu_indices(p, 1)] = e
-
-    return g
-
-
-def parameterize_dag(g):
-    """
-    Randomly parameterize a directed acyclic graph.
-
-    g = directed acyclic graph (adjacency matrix)
-    """
-
-    # p = |variables|
-    p = g.shape[0]
-
-    # e = |edges|
-    e = np.sum(g)
+    B = np.zeros([p, p])
+    B.T[np.triu_indices(p, 1)] = e
 
     # generate variance terms
-    o = np.diag(np.ones(p))
+    O = np.diag(np.ones(p))
 
-    # generate edge weights (edge parameters uniformly sampled [-1.0, 1.0])
-    b = np.zeros([p, p])
-    b[np.where(g == 1)] = np.random.uniform(-1, 1, e)
+    # simulate data
+    X = np.random.multivariate_normal(np.zeros(p), O, n)
+    for i in range(p):
+        J = np.where(B[i])[0]
+        for j in J: X[:, i] += B[i, j] * X[:, j]
 
-    # calculate covariance
-    s = np.dot(np.dot(np.linalg.inv(np.eye(p) - b), o), np.linalg.inv(np.eye(p) - b).T)
+    pi = [i for i in range(p)]
+    np.random.shuffle(pi)
 
-    return s
+    return (B != 0)[pi][:, pi], X[:, pi]
 
 
 class TestGRaSP(unittest.TestCase):
     def test_grasp(self):
-        # ps = [30, 60]
-        ps = [30]
+        ps = [30, 60]
+        # ps = [30]
         ds = [0.1, 0.15]
         n = 1000
         reps = 5
@@ -76,15 +63,13 @@ def test_grasp(self):
             for d in ds:
                 stats = [[], [], [], []]
                 for rep in range(1, reps + 1):
-                    g0 = random_dag(p, d)
+                    g0, X = simulate_data(p, d, n)
                     print(
                         "\nNodes:", p,
                         "| Edges:", np.sum(g0),
                         "| Avg Degree:", 2 * np.sum(g0) / p,
                         "| Rep:", rep,
                     )
-                    cov = parameterize_dag(g0)
-                    X = np.random.multivariate_normal(np.zeros(p), cov, n)
 
                     node_names = [("X%d" % (i + 1)) for i in range(p)]
                     nodes = []
@@ -100,7 +85,7 @@ def test_grasp(self):
 
                     G0 = dag2cpdag(G0)
 
-                    G = grasp(X)
+                    G = grasp(X, depth=1)
 
                     # pyd = GraphUtils.to_pydot(G)
                     # tmp_png = pyd.create_png(f="png")

From 58419ec0fa8b06b042ac0146a4ab97be51697b73 Mon Sep 17 00:00:00 2001
From: Bryan Andrews <bja43@pitt.edu>
Date: Wed, 17 Jul 2024 09:10:21 -0500
Subject: [PATCH 5/7] algorithms are working by efficiency is lackluster

---
 causallearn/search/PermutationBased/BOSS.py  |  8 ++++----
 causallearn/search/PermutationBased/GRaSP.py | 10 +++++-----
 causallearn/search/PermutationBased/gst.py   | 12 ++++++------
 tests/TestBOSS.py                            |  6 +++---
 tests/TestGRaSP.py                           |  7 +++----
 5 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py
index b72d8f44..c77a6620 100644
--- a/causallearn/search/PermutationBased/BOSS.py
+++ b/causallearn/search/PermutationBased/BOSS.py
@@ -100,16 +100,16 @@ def boss(
     elif score_func == "local_score_BIC":  
         # SEM BIC score
         warnings.warn("Please use 'local_score_BIC_from_cov' instead")
-        parameters = {}
-        parameters["lambda_value"] = 2
+        if parameters is None:
+            parameters = {"lambda_value": 2}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC, parameters=parameters
         )
 
     elif score_func == "local_score_BIC_from_cov":  
         # SEM BIC score
-        parameters = {}
-        parameters["lambda_value"] = 2
+        if parameters is None:
+            parameters = {"lambda_value": 2}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
         )
diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py
index 09416b94..62bd7632 100644
--- a/causallearn/search/PermutationBased/GRaSP.py
+++ b/causallearn/search/PermutationBased/GRaSP.py
@@ -92,7 +92,7 @@ def grasp(
     ----------
     X : data set (numpy ndarray), shape (n_samples, n_features). The input data, where n_samples is the number of samples and n_features is the number of features.
     score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general',
-                    'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BDeu')).
+                    'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BIC_from_cov', 'local_score_BDeu')).
     depth : allowed maximum depth for DFS
     parameters : when using CV likelihood,
                   parameters['kfold']: k-fold cross validation
@@ -158,16 +158,16 @@ def grasp(
     elif score_func == "local_score_BIC":  
         # SEM BIC score
         warnings.warn("Please use 'local_score_BIC_from_cov' instead")
-        parameters = {}
-        parameters["lambda_value"] = 2
+        if parameters is None:
+            parameters = {"lambda_value": 2}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC, parameters=parameters
         )
 
     elif score_func == "local_score_BIC_from_cov":  
         # SEM BIC score
-        parameters = {}
-        parameters["lambda_value"] = 2
+        if parameters is None:
+            parameters = {"lambda_value": 2}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
         )
diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py
index 25e0da6b..70d07903 100644
--- a/causallearn/search/PermutationBased/gst.py
+++ b/causallearn/search/PermutationBased/gst.py
@@ -1,8 +1,8 @@
 class GSTNode:
 
     def __init__(self, tree, add=None, score=None):
-        if score is None: score = -tree.score.score_nocache(tree.vertex, [])
-        # if score is None: score = -tree.score.score(tree.vertex, [])
+        # if score is None: score = -tree.score.score_nocache(tree.vertex, [])
+        if score is None: score = -tree.score.score(tree.vertex, [])
         self.tree = tree
         self.add = add
         self.grow_score = score
@@ -17,8 +17,8 @@ def grow(self, available, parents):
         self.branches = []
         for add in available:
             parents.append(add)
-            score = -self.tree.score.score_nocache(self.tree.vertex, parents)
-            # score = -self.tree.score.score(self.tree.vertex, parents)
+            # score = -self.tree.score.score_nocache(self.tree.vertex, parents)
+            score = -self.tree.score.score(self.tree.vertex, parents)
             parents.remove(add)
             branch = GSTNode(self.tree, add, score)
             if score > self.grow_score: self.branches.append(branch)
@@ -30,8 +30,8 @@ def shrink(self, parents):
             best = None
             for remove in [parent for parent in parents]:
                 parents.remove(remove)
-                score = -self.tree.score.score_nocache(self.tree.vertex, parents)
-                # score = -self.tree.score.score(self.tree.vertex, parents)
+                # score = -self.tree.score.score_nocache(self.tree.vertex, parents)
+                score = -self.tree.score.score(self.tree.vertex, parents)
                 parents.append(remove)
                 if score > self.shrink_score:
                     self.shrink_score = score
diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py
index 1d87d235..f67fa5a3 100644
--- a/tests/TestBOSS.py
+++ b/tests/TestBOSS.py
@@ -30,7 +30,7 @@ def simulate_data(p, d, n):
     ne = int(d * pe)
 
     # generate edges
-    e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne))
+    e = np.append(np.zeros(pe - ne), 0.5 * np.random.uniform(-1, 1, ne))
     np.random.shuffle(e)
     B = np.zeros([p, p])
     B.T[np.triu_indices(p, 1)] = e
@@ -66,7 +66,7 @@ def test_boss(self):
                     print(
                         "\nNodes:", p,
                         "| Edges:", np.sum(g0),
-                        "| Avg Degree:", 2 * np.sum(g0) / p,
+                        "| Avg Degree:", 2 * round(np.sum(g0) / p, 2),
                         "| Rep:", rep,
                     )
 
@@ -84,7 +84,7 @@ def test_boss(self):
 
                     G0 = dag2cpdag(G0)
 
-                    G = boss(X)
+                    G = boss(X, parameters={'lambda_value': 2})
 
                     # pyd = GraphUtils.to_pydot(G)
                     # tmp_png = pyd.create_png(f="png")
diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py
index 1a8e0638..0ac1d81d 100644
--- a/tests/TestGRaSP.py
+++ b/tests/TestGRaSP.py
@@ -11,7 +11,6 @@
 import matplotlib.image as mpimg
 import matplotlib.pyplot as plt
 import io
-import random
 
 
 def simulate_data(p, d, n):
@@ -31,7 +30,7 @@ def simulate_data(p, d, n):
     ne = int(d * pe)
 
     # generate edges
-    e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne))
+    e = np.append(np.zeros(pe - ne), 0.5 * np.random.uniform(-1, 1, ne))
     np.random.shuffle(e)
     B = np.zeros([p, p])
     B.T[np.triu_indices(p, 1)] = e
@@ -67,7 +66,7 @@ def test_grasp(self):
                     print(
                         "\nNodes:", p,
                         "| Edges:", np.sum(g0),
-                        "| Avg Degree:", 2 * np.sum(g0) / p,
+                        "| Avg Degree:", 2 * round(np.sum(g0) / p, 2),
                         "| Rep:", rep,
                     )
 
@@ -85,7 +84,7 @@ def test_grasp(self):
 
                     G0 = dag2cpdag(G0)
 
-                    G = grasp(X, depth=1)
+                    G = grasp(X, depth=1, parameters={'lambda_value': 2})
 
                     # pyd = GraphUtils.to_pydot(G)
                     # tmp_png = pyd.create_png(f="png")

From 9ea1feddf57954d39f22e79fc57161628d8d682b Mon Sep 17 00:00:00 2001
From: Bryan Andrews <bja43@pitt.edu>
Date: Thu, 18 Jul 2024 13:01:03 -0500
Subject: [PATCH 6/7] final updates to BOSS and GRaSP

---
 causallearn/search/PermutationBased/BOSS.py  |  11 +-
 causallearn/search/PermutationBased/GRaSP.py | 103 ++-----------------
 causallearn/search/PermutationBased/gst.py   |  12 +--
 tests/TestBOSS.py                            |  21 ++--
 tests/TestGRaSP.py                           |  21 ++--
 5 files changed, 31 insertions(+), 137 deletions(-)

diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py
index c77a6620..465d3659 100644
--- a/causallearn/search/PermutationBased/BOSS.py
+++ b/causallearn/search/PermutationBased/BOSS.py
@@ -48,11 +48,11 @@ def boss(
     G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j.
     """
 
+    X = X.copy()
     n, p = X.shape
     if n < p:
         warnings.warn("The number of features is much larger than the sample size!")
 
-    X = np.mat(X)
     if score_func == "local_score_CV_general":  
         # % k-fold negative cross validated likelihood based on regression in RKHS
         if parameters is None:
@@ -63,14 +63,12 @@ def boss(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_general, parameters=parameters
         )
-
     elif score_func == "local_score_marginal_general":  
         # negative marginal likelihood based on regression in RKHS
         parameters = {}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_general, parameters=parameters
         )
-
     elif score_func == "local_score_CV_multi":  
         # k-fold negative cross validated likelihood based on regression in RKHS
         # for data with multi-variate dimensions
@@ -85,7 +83,6 @@ def boss(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_multi, parameters=parameters
         )
-
     elif score_func == "local_score_marginal_multi":  
         # negative marginal likelihood based on regression in RKHS
         # for data with multi-variate dimensions
@@ -96,7 +93,6 @@ def boss(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_multi, parameters=parameters
         )
-
     elif score_func == "local_score_BIC":  
         # SEM BIC score
         warnings.warn("Please use 'local_score_BIC_from_cov' instead")
@@ -105,7 +101,6 @@ def boss(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC, parameters=parameters
         )
-
     elif score_func == "local_score_BIC_from_cov":  
         # SEM BIC score
         if parameters is None:
@@ -113,17 +108,15 @@ def boss(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
         )
-
     elif score_func == "local_score_BDeu":  
         # BDeu score
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BDeu, parameters=None
         )
-
     else:
         raise Exception("Unknown function!")
+    
     score = localScoreClass
-
     gsts = [GST(i, score) for i in range(p)]
 
     node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names
diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py
index 62bd7632..f4d4aa14 100644
--- a/causallearn/search/PermutationBased/GRaSP.py
+++ b/causallearn/search/PermutationBased/GRaSP.py
@@ -106,11 +106,11 @@ def grasp(
     G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j.
     """
 
+    X = X.copy()
     n, p = X.shape
     if n < p:
         warnings.warn("The number of features is much larger than the sample size!")
 
-    X = np.mat(X)
     if score_func == "local_score_CV_general":  
         # k-fold negative cross validated likelihood based on regression in RKHS
         if parameters is None:
@@ -121,14 +121,12 @@ def grasp(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_general, parameters=parameters
         )
-
     elif score_func == "local_score_marginal_general":
         # negative marginal likelihood based on regression in RKHS
         parameters = {}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_general, parameters=parameters
         )
-
     elif score_func == "local_score_CV_multi": 
         # k-fold negative cross validated likelihood based on regression in RKHS
         # for data with multi-variate dimensions
@@ -143,7 +141,6 @@ def grasp(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_multi, parameters=parameters
         )
-
     elif score_func == "local_score_marginal_multi":  
         # negative marginal likelihood based on regression in RKHS
         # for data with multi-variate dimensions
@@ -154,7 +151,6 @@ def grasp(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_multi, parameters=parameters
         )
-
     elif score_func == "local_score_BIC":  
         # SEM BIC score
         warnings.warn("Please use 'local_score_BIC_from_cov' instead")
@@ -163,7 +159,6 @@ def grasp(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC, parameters=parameters
         )
-
     elif score_func == "local_score_BIC_from_cov":  
         # SEM BIC score
         if parameters is None:
@@ -171,17 +166,15 @@ def grasp(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
         )
-
     elif score_func == "local_score_BDeu":  
         # BDeu score
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BDeu, parameters=None
         )
-
     else:
         raise Exception("Unknown function!")
-    score = localScoreClass
 
+    score = localScoreClass
     gsts = [GST(i, score) for i in range(p)]
 
     node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names
@@ -201,15 +194,11 @@ def grasp(
         y_parents = order.get_parents(y)
 
         candidates = [order.get(j) for j in range(0, i)]
-        # local_score = gsts[y].trace(candidates)
-
-        grow(y, y_parents, candidates, score)
-        local_score = shrink(y, y_parents, score)
-        
+        local_score = gsts[y].trace(candidates, y_parents)
         order.set_local_score(y, local_score)
         order.bump_edges(len(y_parents))
 
-    while dfs(depth - 1, set(), [], order, score, gsts):
+    while dfs(depth - 1, set(), [], order, gsts):
         if verbose:
             sys.stdout.write("\rGRaSP edge count: %i    " % order.get_edges())
             sys.stdout.flush()
@@ -230,7 +219,7 @@ def grasp(
 
 
 # performs a dfs over covered tucks
-def dfs(depth: int, flipped: set, history: List[set], order, score, gsts):
+def dfs(depth: int, flipped: set, history: List[set], order, gsts):
 
     cache = [{}, {}, {}, 0]
 
@@ -258,7 +247,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score, gsts):
             cache[3] = order.get_edges()
 
             tuck(i, j, order)
-            edge_bump, score_bump = update(i, j, order, score, gsts)
+            edge_bump, score_bump = update(i, j, order, gsts)
 
             # because things that should be zero sometimes are not
             if score_bump > 1e-6:
@@ -277,7 +266,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score, gsts):
 
                 if len(flipped) > 0 and flipped not in history:
                     history.append(flipped)
-                    if depth > 0 and dfs(depth - 1, flipped, history, order, score, gsts):
+                    if depth > 0 and dfs(depth - 1, flipped, history, order, gsts):
                         return True
                     del history[-1]
 
@@ -292,7 +281,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score, gsts):
 
 
 # updates the parents and scores after a tuck
-def update(i: int, j: int, order, score, gsts):
+def update(i: int, j: int, order, gsts):
 
     edge_bump = 0
     old_score = 0
@@ -307,19 +296,7 @@ def update(i: int, j: int, order, score, gsts):
 
         z_parents.clear()
         candidates = [order.get(l) for l in range(0, k)]
-
-        # for w in [w for w in z_parents if w not in candidates]:
-        #     z_parents.remove(w)
-        # shrink(z, z_parents, score)
-
-        # for w in z_parents:
-        #     candidates.remove(w)
-
-        # local_score = gsts[z].trace(candidates, z_parents)
-
-        grow(z, z_parents, candidates, score)
-        local_score = shrink(z, z_parents, score)
-
+        local_score = gsts[z].trace(candidates, z_parents)
         order.set_local_score(z, local_score)
 
         edge_bump += len(z_parents)
@@ -328,68 +305,6 @@ def update(i: int, j: int, order, score, gsts):
     return edge_bump, new_score - old_score
 
 
-# grow of grow-shrink
-def grow(y: int, y_parents: List[int], candidates: List[int], score):
-
-    best = -score.score(y, y_parents)
-    # best = -score.score_nocache(y, y_parents)
-
-    add = None
-    checked = []
-    while add is not None or len(candidates) > 0:
-
-        if add is not None:
-            checked.remove(add)
-            y_parents.append(add)
-            candidates = checked
-            checked = []
-            add = None
-
-        while len(candidates) > 0:
-
-            x = candidates.pop()
-            y_parents.append(x)
-            current = -score.score(y, y_parents)
-            # current = -score.score_nocache(y, y_parents)
-            y_parents.remove(x)
-            checked.append(x)
-
-            if current > best:
-                best = current
-                add = x
-
-    return best
-
-
-# shrink of grow-shrink
-def shrink(y: int, y_parents: List[int], score):
-
-    best = -score.score(y, y_parents)
-    # best = -score.score_nocache(y, y_parents)
-
-    remove = None
-    checked = 0
-    while remove is not None or checked < len(y_parents):
-
-        if remove is not None:
-            y_parents.remove(remove)
-            checked = 0
-            remove = None
-
-        while checked < len(y_parents):
-            x = y_parents.pop(0)
-            current = -score.score(y, y_parents)
-            # current = -score.score_nocache(y, y_parents)
-            y_parents.append(x)
-            checked += 1
-
-            if current > best:
-                best = current
-                remove = x
-
-    return best
-
-
 # tucks the node at position i into position j
 def tuck(i: int, j: int, order):
 
diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py
index 70d07903..25e0da6b 100644
--- a/causallearn/search/PermutationBased/gst.py
+++ b/causallearn/search/PermutationBased/gst.py
@@ -1,8 +1,8 @@
 class GSTNode:
 
     def __init__(self, tree, add=None, score=None):
-        # if score is None: score = -tree.score.score_nocache(tree.vertex, [])
-        if score is None: score = -tree.score.score(tree.vertex, [])
+        if score is None: score = -tree.score.score_nocache(tree.vertex, [])
+        # if score is None: score = -tree.score.score(tree.vertex, [])
         self.tree = tree
         self.add = add
         self.grow_score = score
@@ -17,8 +17,8 @@ def grow(self, available, parents):
         self.branches = []
         for add in available:
             parents.append(add)
-            # score = -self.tree.score.score_nocache(self.tree.vertex, parents)
-            score = -self.tree.score.score(self.tree.vertex, parents)
+            score = -self.tree.score.score_nocache(self.tree.vertex, parents)
+            # score = -self.tree.score.score(self.tree.vertex, parents)
             parents.remove(add)
             branch = GSTNode(self.tree, add, score)
             if score > self.grow_score: self.branches.append(branch)
@@ -30,8 +30,8 @@ def shrink(self, parents):
             best = None
             for remove in [parent for parent in parents]:
                 parents.remove(remove)
-                # score = -self.tree.score.score_nocache(self.tree.vertex, parents)
-                score = -self.tree.score.score(self.tree.vertex, parents)
+                score = -self.tree.score.score_nocache(self.tree.vertex, parents)
+                # score = -self.tree.score.score(self.tree.vertex, parents)
                 parents.append(remove)
                 if score > self.shrink_score:
                     self.shrink_score = score
diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py
index f67fa5a3..ea963d47 100644
--- a/tests/TestBOSS.py
+++ b/tests/TestBOSS.py
@@ -7,10 +7,8 @@
 from causallearn.graph.GraphNode import GraphNode
 from causallearn.search.PermutationBased.BOSS import boss
 from causallearn.utils.DAG2CPDAG import dag2cpdag
-from causallearn.utils.GraphUtils import GraphUtils
-import matplotlib.image as mpimg
-import matplotlib.pyplot as plt
-import io
+
+import gc
 
 
 def simulate_data(p, d, n):
@@ -53,11 +51,13 @@ def simulate_data(p, d, n):
 class TestBOSS(unittest.TestCase):
     def test_boss(self):
         ps = [30, 60]
-        # ps = [30]
         ds = [0.1, 0.15]
         n = 1000
         reps = 5
 
+        gc.set_threshold(20000, 50, 50)
+        # gc.set_debug(gc.DEBUG_STATS)
+
         for p in ps:
             for d in ds:
                 stats = [[], [], [], []]
@@ -84,15 +84,8 @@ def test_boss(self):
 
                     G0 = dag2cpdag(G0)
 
-                    G = boss(X, parameters={'lambda_value': 2})
-
-                    # pyd = GraphUtils.to_pydot(G)
-                    # tmp_png = pyd.create_png(f="png")
-                    # fp = io.BytesIO(tmp_png)
-                    # img = mpimg.imread(fp, format='png')
-                    # plt.axis('off')
-                    # plt.imshow(img)
-                    # plt.show()
+                    G = boss(X, parameters={'lambda_value': 4})
+                    gc.collect()
 
                     AdjC = AdjacencyConfusion(G0, G)
                     stats[0].append(AdjC.get_adj_precision())
diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py
index 0ac1d81d..f1fe0dd0 100644
--- a/tests/TestGRaSP.py
+++ b/tests/TestGRaSP.py
@@ -7,10 +7,8 @@
 from causallearn.graph.GraphNode import GraphNode
 from causallearn.search.PermutationBased.GRaSP import grasp
 from causallearn.utils.DAG2CPDAG import dag2cpdag
-from causallearn.utils.GraphUtils import GraphUtils
-import matplotlib.image as mpimg
-import matplotlib.pyplot as plt
-import io
+
+import gc
 
 
 def simulate_data(p, d, n):
@@ -53,11 +51,13 @@ def simulate_data(p, d, n):
 class TestGRaSP(unittest.TestCase):
     def test_grasp(self):
         ps = [30, 60]
-        # ps = [30]
         ds = [0.1, 0.15]
         n = 1000
         reps = 5
 
+        gc.set_threshold(20000, 50, 50)
+        # gc.set_debug(gc.DEBUG_STATS)
+
         for p in ps:
             for d in ds:
                 stats = [[], [], [], []]
@@ -84,15 +84,8 @@ def test_grasp(self):
 
                     G0 = dag2cpdag(G0)
 
-                    G = grasp(X, depth=1, parameters={'lambda_value': 2})
-
-                    # pyd = GraphUtils.to_pydot(G)
-                    # tmp_png = pyd.create_png(f="png")
-                    # fp = io.BytesIO(tmp_png)
-                    # img = mpimg.imread(fp, format='png')
-                    # plt.axis('off')
-                    # plt.imshow(img)
-                    # plt.show()
+                    G = grasp(X, depth=1, parameters={'lambda_value': 4})
+                    gc.collect()
 
                     AdjC = AdjacencyConfusion(G0, G)
                     stats[0].append(AdjC.get_adj_precision())

From 53995155e4be2ee400ad6594630cdd7c08ea2ed1 Mon Sep 17 00:00:00 2001
From: Bryan Andrews <bja43@pitt.edu>
Date: Thu, 18 Jul 2024 13:01:54 -0500
Subject: [PATCH 7/7] final updates

---
 causallearn/score/LocalScoreFunction.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/causallearn/score/LocalScoreFunction.py b/causallearn/score/LocalScoreFunction.py
index e02ba737..a4af5361 100644
--- a/causallearn/score/LocalScoreFunction.py
+++ b/causallearn/score/LocalScoreFunction.py
@@ -34,9 +34,9 @@ def local_score_BIC(Data: ndarray, i: int, PAi: List[int], parameters=None) -> f
     if len(PAi) == 0:
         return n * np.log(cov[i, i])
 
-    yX = np.mat(cov[np.ix_([i], PAi)])
-    XX = np.mat(cov[np.ix_(PAi, PAi)])
-    H = np.log(cov[i, i] - yX * np.linalg.inv(XX) * yX.T)
+    yX = cov[np.ix_([i], PAi)]
+    XX = cov[np.ix_(PAi, PAi)]
+    H = np.log(cov[i, i] - yX @ np.linalg.inv(XX) @ yX.T)
 
     return n * H + np.log(n) * len(PAi) * lambda_value
 
@@ -68,9 +68,9 @@ def local_score_BIC_from_cov(
     if len(PAi) == 0:
         return n * np.log(cov[i, i])
 
-    yX = np.mat(cov[np.ix_([i], PAi)])
-    XX = np.mat(cov[np.ix_(PAi, PAi)])
-    H = np.log(cov[i, i] - yX * np.linalg.inv(XX) * yX.T)
+    yX = cov[np.ix_([i], PAi)]
+    XX = cov[np.ix_(PAi, PAi)]
+    H = np.log(cov[i, i] - yX @ np.linalg.inv(XX) @ yX.T)
 
     return n * H + np.log(n) * len(PAi) * lambda_value