From e039b27aefdb7bd936c463d2d7e97366f3a12f46 Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Sun, 14 Jul 2024 15:54:14 -0500 Subject: [PATCH 1/7] introduced a way to use the local score object without caching --- causallearn/score/LocalScoreFunctionClass.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/causallearn/score/LocalScoreFunctionClass.py b/causallearn/score/LocalScoreFunctionClass.py index 41ffb49f..8171d515 100644 --- a/causallearn/score/LocalScoreFunctionClass.py +++ b/causallearn/score/LocalScoreFunctionClass.py @@ -46,3 +46,9 @@ def score(self, i: int, PAi: List[int]) -> float: self.score_cache[i][hash_key] = self.local_score_fun(self.data, i, PAi, self.parameters) return self.score_cache[i][hash_key] + + def score_nocache(self, i: int, PAi: List[int]) -> float: + if self.local_score_fun == local_score_BIC_from_cov: + return self.local_score_fun((self.cov, self.n), i, PAi, self.parameters) + else: + return self.local_score_fun(self.data, i, PAi, self.parameters) \ No newline at end of file From 54e752b587d3e59fd879322b962da15c0a439890 Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Mon, 15 Jul 2024 15:31:02 -0500 Subject: [PATCH 2/7] removing maxP because it dodnt do anything --- causallearn/search/PermutationBased/GRaSP.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index abcbf47c..6fcf2121 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -78,7 +78,6 @@ def grasp( X: np.ndarray, score_func: str = "local_score_BIC", depth: Optional[int] = 3, - maxP: Optional[float] = None, parameters: Optional[Dict[str, Any]] = None, verbose: Optional[bool] = True, node_names: Optional[List[str]] = None, @@ -92,7 +91,6 @@ def grasp( score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general', 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BDeu')). depth : allowed maximum depth for DFS - maxP : allowed maximum number of parents when searching the graph parameters : when using CV likelihood, parameters['kfold']: k-fold cross validation parameters['lambda']: regularization parameter @@ -118,8 +116,6 @@ def grasp( "kfold": 10, # 10 fold cross validation "lambda": 0.01, } # regularization parameter - if maxP is None: - maxP = X.shape[1] / 2 # maximum number of parents localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_general, parameters=parameters ) @@ -128,8 +124,6 @@ def grasp( score_func == "local_score_marginal_general" ): # negative marginal likelihood based on regression in RKHS parameters = {} - if maxP is None: - maxP = X.shape[1] / 2 # maximum number of parents localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_general, parameters=parameters ) @@ -146,8 +140,6 @@ def grasp( } # regularization parameter for i in range(X.shape[1]): parameters["dlabel"]["{}".format(i)] = i - if maxP is None: - maxP = len(parameters["dlabel"]) / 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_multi, parameters=parameters ) @@ -160,8 +152,6 @@ def grasp( parameters = {"dlabel": {}} for i in range(X.shape[1]): parameters["dlabel"]["{}".format(i)] = i - if maxP is None: - maxP = len(parameters["dlabel"]) / 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_multi, parameters=parameters ) @@ -169,15 +159,11 @@ def grasp( elif score_func == "local_score_BIC": # Greedy equivalence search with BIC score parameters = {} parameters["lambda_value"] = 2 - if maxP is None: - maxP = X.shape[1] / 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) elif score_func == "local_score_BDeu": # Greedy equivalence search with BDeu score - if maxP is None: - maxP = X.shape[1] / 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None ) From 76f7956f42f6817f0ad760989e154231650b5e1d Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Tue, 16 Jul 2024 06:50:24 -0500 Subject: [PATCH 3/7] fixed grasp unittest --- causallearn/search/PermutationBased/GRaSP.py | 11 ++++++- tests/TestGRaSP.py | 31 +++++++++----------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index 6fcf2121..fcff6c0d 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -10,6 +10,7 @@ from causallearn.score.LocalScoreFunction import ( local_score_BDeu, local_score_BIC, + local_score_BIC_from_cov, local_score_cv_general, local_score_cv_multi, local_score_marginal_general, @@ -76,7 +77,7 @@ def len(self): def grasp( X: np.ndarray, - score_func: str = "local_score_BIC", + score_func: str = "local_score_BIC_from_cov", depth: Optional[int] = 3, parameters: Optional[Dict[str, Any]] = None, verbose: Optional[bool] = True, @@ -157,12 +158,20 @@ def grasp( ) elif score_func == "local_score_BIC": # Greedy equivalence search with BIC score + warnings.warn("Please use 'local_score_BIC_from_cov' instead") parameters = {} parameters["lambda_value"] = 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) + elif score_func == "local_score_BIC_from_cov": # Greedy equivalence search with BIC score + parameters = {} + parameters["lambda_value"] = 2 + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters + ) + elif score_func == "local_score_BDeu": # Greedy equivalence search with BDeu score localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py index 7482dcee..de44ae1e 100644 --- a/tests/TestGRaSP.py +++ b/tests/TestGRaSP.py @@ -66,7 +66,8 @@ def parameterize_dag(g): class TestGRaSP(unittest.TestCase): def test_grasp(self): - ps = [30, 60] + # ps = [30, 60] + ps = [30] ds = [0.1, 0.15] n = 1000 reps = 5 @@ -77,19 +78,15 @@ def test_grasp(self): for rep in range(1, reps + 1): g0 = random_dag(p, d) print( - "\nNodes:", - p, - "| Edges:", - np.sum(g0), - "| Avg Degree:", - 2 * np.sum(g0) / p, - "| Rep:", - rep, + "\nNodes:", p, + "| Edges:", np.sum(g0), + "| Avg Degree:", 2 * np.sum(g0) / p, + "| Rep:", rep, ) cov = parameterize_dag(g0) X = np.random.multivariate_normal(np.zeros(p), cov, n) - node_names = [("x%d" % i) for i in range(p)] + node_names = [("X%d" % (i + 1)) for i in range(p)] nodes = [] for name in node_names: @@ -105,13 +102,13 @@ def test_grasp(self): G = grasp(X) - pyd = GraphUtils.to_pydot(G) - tmp_png = pyd.create_png(f="png") - fp = io.BytesIO(tmp_png) - img = mpimg.imread(fp, format='png') - plt.axis('off') - plt.imshow(img) - plt.show() + # pyd = GraphUtils.to_pydot(G) + # tmp_png = pyd.create_png(f="png") + # fp = io.BytesIO(tmp_png) + # img = mpimg.imread(fp, format='png') + # plt.axis('off') + # plt.imshow(img) + # plt.show() AdjC = AdjacencyConfusion(G0, G) stats[0].append(AdjC.get_adj_precision()) From cebe49b8c25a8234c877ca06e6d1a8debddfa320 Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Tue, 16 Jul 2024 20:27:18 -0500 Subject: [PATCH 4/7] adding gsts and boss --- causallearn/search/PermutationBased/BOSS.py | 214 +++++++++++++++++++ causallearn/search/PermutationBased/GRaSP.py | 65 +++--- causallearn/search/PermutationBased/gst.py | 77 +++++++ tests/TestBOSS.py | 124 +++++++++++ tests/TestGRaSP.py | 59 ++--- 5 files changed, 476 insertions(+), 63 deletions(-) create mode 100644 causallearn/search/PermutationBased/BOSS.py create mode 100644 causallearn/search/PermutationBased/gst.py create mode 100644 tests/TestBOSS.py diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py new file mode 100644 index 00000000..b72d8f44 --- /dev/null +++ b/causallearn/search/PermutationBased/BOSS.py @@ -0,0 +1,214 @@ +import random +import sys +import time +import warnings +from typing import Any, Dict, List, Optional + +import numpy as np +from causallearn.graph.GeneralGraph import GeneralGraph +from causallearn.graph.GraphNode import GraphNode +from causallearn.score.LocalScoreFunction import ( + local_score_BDeu, + local_score_BIC, + local_score_BIC_from_cov, + local_score_cv_general, + local_score_cv_multi, + local_score_marginal_general, + local_score_marginal_multi, +) +from causallearn.search.PermutationBased.gst import GST; +from causallearn.score.LocalScoreFunctionClass import LocalScoreClass +from causallearn.utils.DAG2CPDAG import dag2cpdag + + +def boss( + X: np.ndarray, + score_func: str = "local_score_BIC_from_cov", + parameters: Optional[Dict[str, Any]] = None, + verbose: Optional[bool] = True, + node_names: Optional[List[str]] = None, +) -> GeneralGraph: + """ + Perform a best order score search (BOSS) algorithm + + Parameters + ---------- + X : data set (numpy ndarray), shape (n_samples, n_features). The input data, where n_samples is the number of samples and n_features is the number of features. + score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general', + 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BIC_from_cov', 'local_score_BDeu')). + parameters : when using CV likelihood, + parameters['kfold']: k-fold cross validation + parameters['lambda']: regularization parameter + parameters['dlabel']: for variables with multi-dimensions, + indicate which dimensions belong to the i-th variable. + verbose : whether to print the time cost and verbose output of the algorithm. + + Returns + ------- + G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j. + """ + + n, p = X.shape + if n < p: + warnings.warn("The number of features is much larger than the sample size!") + + X = np.mat(X) + if score_func == "local_score_CV_general": + # % k-fold negative cross validated likelihood based on regression in RKHS + if parameters is None: + parameters = { + "kfold": 10, # 10 fold cross validation + "lambda": 0.01, + } # regularization parameter + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_cv_general, parameters=parameters + ) + + elif score_func == "local_score_marginal_general": + # negative marginal likelihood based on regression in RKHS + parameters = {} + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_marginal_general, parameters=parameters + ) + + elif score_func == "local_score_CV_multi": + # k-fold negative cross validated likelihood based on regression in RKHS + # for data with multi-variate dimensions + if parameters is None: + parameters = { + "kfold": 10, + "lambda": 0.01, + "dlabel": {}, + } # regularization parameter + for i in range(X.shape[1]): + parameters["dlabel"]["{}".format(i)] = i + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_cv_multi, parameters=parameters + ) + + elif score_func == "local_score_marginal_multi": + # negative marginal likelihood based on regression in RKHS + # for data with multi-variate dimensions + if parameters is None: + parameters = {"dlabel": {}} + for i in range(X.shape[1]): + parameters["dlabel"]["{}".format(i)] = i + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_marginal_multi, parameters=parameters + ) + + elif score_func == "local_score_BIC": + # SEM BIC score + warnings.warn("Please use 'local_score_BIC_from_cov' instead") + parameters = {} + parameters["lambda_value"] = 2 + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BIC, parameters=parameters + ) + + elif score_func == "local_score_BIC_from_cov": + # SEM BIC score + parameters = {} + parameters["lambda_value"] = 2 + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters + ) + + elif score_func == "local_score_BDeu": + # BDeu score + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BDeu, parameters=None + ) + + else: + raise Exception("Unknown function!") + score = localScoreClass + + gsts = [GST(i, score) for i in range(p)] + + node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names + nodes = [] + + for name in node_names: + node = GraphNode(name) + nodes.append(node) + + G = GeneralGraph(nodes) + + runtime = time.perf_counter() + + order = [v for v in range(p)] + + gsts = [GST(v, score) for v in order] + parents = {v: [] for v in order} + + variables = [v for v in order] + while True: + improved = False + random.shuffle(variables) + if verbose: + for i, v in enumerate(order): + parents[v].clear() + gsts[v].trace(order[:i], parents[v]) + sys.stdout.write("\rBOSS edge count: %i " % np.sum([len(parents[v]) for v in range(p)])) + sys.stdout.flush() + + for v in variables: + improved |= better_mutation(v, order, gsts) + if not improved: break + + for i, v in enumerate(order): + parents[v].clear() + gsts[v].trace(order[:i], parents[v]) + + runtime = time.perf_counter() - runtime + + if verbose: + sys.stdout.write("\nBOSS completed in: %.2fs \n" % runtime) + sys.stdout.flush() + + for y in range(p): + for x in parents[y]: + G.add_directed_edge(nodes[x], nodes[y]) + + G = dag2cpdag(G) + + return G + + +def reversed_enumerate(iter, j): + for w in reversed(iter): + yield j, w + j -= 1 + + +def better_mutation(v, order, gsts): + i = order.index(v) + p = len(order) + scores = np.zeros(p + 1) + + prefix = [] + score = 0 + for j, w in enumerate(order): + scores[j] = gsts[v].trace(prefix) + score + if v != w: + score += gsts[w].trace(prefix) + prefix.append(w) + + scores[p] = gsts[v].trace(prefix) + score + best = p + + prefix.append(v) + score = 0 + for j, w in reversed_enumerate(order, p - 1): + if v != w: + prefix.remove(w) + score += gsts[w].trace(prefix) + scores[j] += score + if scores[j] > scores[best]: best = j + + if scores[i] + 1e-6 > scores[best]: return False + order.remove(v) + order.insert(best - int(best > i), v) + + return True \ No newline at end of file diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index fcff6c0d..09416b94 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -16,6 +16,7 @@ local_score_marginal_general, local_score_marginal_multi, ) +from causallearn.search.PermutationBased.gst import GST; from causallearn.score.LocalScoreFunctionClass import LocalScoreClass from causallearn.utils.DAG2CPDAG import dag2cpdag @@ -34,6 +35,7 @@ def __init__(self, p, score): y = self.order[i] self.parents[y] = [] self.local_scores[y] = -score.score(y, []) + # self.local_scores[y] = -score.score_nocache(y, []) def get(self, i): return self.order[i] @@ -109,9 +111,8 @@ def grasp( warnings.warn("The number of features is much larger than the sample size!") X = np.mat(X) - if ( - score_func == "local_score_CV_general" - ): # % k-fold negative cross validated likelihood based on regression in RKHS + if score_func == "local_score_CV_general": + # k-fold negative cross validated likelihood based on regression in RKHS if parameters is None: parameters = { "kfold": 10, # 10 fold cross validation @@ -121,17 +122,15 @@ def grasp( data=X, local_score_fun=local_score_cv_general, parameters=parameters ) - elif ( - score_func == "local_score_marginal_general" - ): # negative marginal likelihood based on regression in RKHS + elif score_func == "local_score_marginal_general": + # negative marginal likelihood based on regression in RKHS parameters = {} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_general, parameters=parameters ) - elif ( - score_func == "local_score_CV_multi" - ): # k-fold negative cross validated likelihood based on regression in RKHS + elif score_func == "local_score_CV_multi": + # k-fold negative cross validated likelihood based on regression in RKHS # for data with multi-variate dimensions if parameters is None: parameters = { @@ -145,9 +144,8 @@ def grasp( data=X, local_score_fun=local_score_cv_multi, parameters=parameters ) - elif ( - score_func == "local_score_marginal_multi" - ): # negative marginal likelihood based on regression in RKHS + elif score_func == "local_score_marginal_multi": + # negative marginal likelihood based on regression in RKHS # for data with multi-variate dimensions if parameters is None: parameters = {"dlabel": {}} @@ -157,7 +155,8 @@ def grasp( data=X, local_score_fun=local_score_marginal_multi, parameters=parameters ) - elif score_func == "local_score_BIC": # Greedy equivalence search with BIC score + elif score_func == "local_score_BIC": + # SEM BIC score warnings.warn("Please use 'local_score_BIC_from_cov' instead") parameters = {} parameters["lambda_value"] = 2 @@ -165,14 +164,16 @@ def grasp( data=X, local_score_fun=local_score_BIC, parameters=parameters ) - elif score_func == "local_score_BIC_from_cov": # Greedy equivalence search with BIC score + elif score_func == "local_score_BIC_from_cov": + # SEM BIC score parameters = {} parameters["lambda_value"] = 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters ) - elif score_func == "local_score_BDeu": # Greedy equivalence search with BDeu score + elif score_func == "local_score_BDeu": + # BDeu score localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None ) @@ -181,6 +182,8 @@ def grasp( raise Exception("Unknown function!") score = localScoreClass + gsts = [GST(i, score) for i in range(p)] + node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names nodes = [] @@ -198,12 +201,15 @@ def grasp( y_parents = order.get_parents(y) candidates = [order.get(j) for j in range(0, i)] + # local_score = gsts[y].trace(candidates) + grow(y, y_parents, candidates, score) local_score = shrink(y, y_parents, score) + order.set_local_score(y, local_score) order.bump_edges(len(y_parents)) - while dfs(depth - 1, set(), [], order, score): + while dfs(depth - 1, set(), [], order, score, gsts): if verbose: sys.stdout.write("\rGRaSP edge count: %i " % order.get_edges()) sys.stdout.flush() @@ -224,7 +230,7 @@ def grasp( # performs a dfs over covered tucks -def dfs(depth: int, flipped: set, history: List[set], order, score): +def dfs(depth: int, flipped: set, history: List[set], order, score, gsts): cache = [{}, {}, {}, 0] @@ -252,7 +258,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score): cache[3] = order.get_edges() tuck(i, j, order) - edge_bump, score_bump = update(i, j, order, score) + edge_bump, score_bump = update(i, j, order, score, gsts) # because things that should be zero sometimes are not if score_bump > 1e-6: @@ -271,7 +277,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score): if len(flipped) > 0 and flipped not in history: history.append(flipped) - if depth > 0 and dfs(depth - 1, flipped, history, order, score): + if depth > 0 and dfs(depth - 1, flipped, history, order, score, gsts): return True del history[-1] @@ -286,7 +292,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score): # updates the parents and scores after a tuck -def update(i: int, j: int, order, score): +def update(i: int, j: int, order, score, gsts): edge_bump = 0 old_score = 0 @@ -299,18 +305,21 @@ def update(i: int, j: int, order, score): edge_bump -= len(z_parents) old_score += order.get_local_score(z) + z_parents.clear() candidates = [order.get(l) for l in range(0, k)] - for w in [w for w in z_parents if w not in candidates]: - z_parents.remove(w) - shrink(z, z_parents, score) + # for w in [w for w in z_parents if w not in candidates]: + # z_parents.remove(w) + # shrink(z, z_parents, score) - for w in z_parents: - candidates.remove(w) + # for w in z_parents: + # candidates.remove(w) - grow(z, z_parents, candidates, score) + # local_score = gsts[z].trace(candidates, z_parents) + grow(z, z_parents, candidates, score) local_score = shrink(z, z_parents, score) + order.set_local_score(z, local_score) edge_bump += len(z_parents) @@ -323,6 +332,7 @@ def update(i: int, j: int, order, score): def grow(y: int, y_parents: List[int], candidates: List[int], score): best = -score.score(y, y_parents) + # best = -score.score_nocache(y, y_parents) add = None checked = [] @@ -340,6 +350,7 @@ def grow(y: int, y_parents: List[int], candidates: List[int], score): x = candidates.pop() y_parents.append(x) current = -score.score(y, y_parents) + # current = -score.score_nocache(y, y_parents) y_parents.remove(x) checked.append(x) @@ -354,6 +365,7 @@ def grow(y: int, y_parents: List[int], candidates: List[int], score): def shrink(y: int, y_parents: List[int], score): best = -score.score(y, y_parents) + # best = -score.score_nocache(y, y_parents) remove = None checked = 0 @@ -367,6 +379,7 @@ def shrink(y: int, y_parents: List[int], score): while checked < len(y_parents): x = y_parents.pop(0) current = -score.score(y, y_parents) + # current = -score.score_nocache(y, y_parents) y_parents.append(x) checked += 1 diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py new file mode 100644 index 00000000..25e0da6b --- /dev/null +++ b/causallearn/search/PermutationBased/gst.py @@ -0,0 +1,77 @@ +class GSTNode: + + def __init__(self, tree, add=None, score=None): + if score is None: score = -tree.score.score_nocache(tree.vertex, []) + # if score is None: score = -tree.score.score(tree.vertex, []) + self.tree = tree + self.add = add + self.grow_score = score + self.shrink_score = score + self.branches = None + self.remove = None + + def __lt__(self, other): + return self.grow_score < other.grow_score + + def grow(self, available, parents): + self.branches = [] + for add in available: + parents.append(add) + score = -self.tree.score.score_nocache(self.tree.vertex, parents) + # score = -self.tree.score.score(self.tree.vertex, parents) + parents.remove(add) + branch = GSTNode(self.tree, add, score) + if score > self.grow_score: self.branches.append(branch) + self.branches.sort() + + def shrink(self, parents): + self.remove = [] + while True: + best = None + for remove in [parent for parent in parents]: + parents.remove(remove) + score = -self.tree.score.score_nocache(self.tree.vertex, parents) + # score = -self.tree.score.score(self.tree.vertex, parents) + parents.append(remove) + if score > self.shrink_score: + self.shrink_score = score + best = remove + if best is None: break + self.remove.append(best) + parents.remove(best) + + def trace(self, prefix, available, parents): + if self.branches is None: self.grow(available, parents) + for branch in self.branches: + available.remove(branch.add) + if branch.add in prefix: + parents.append(branch.add) + return branch.trace(prefix, available, parents) + if self.remove is None: + self.shrink(parents) + return self.shrink_score + for remove in self.remove: parents.remove(remove) + return self.shrink_score + + +class GST: + + def __init__(self, vertex, score): + self.vertex = vertex + self.score = score + self.root = GSTNode(self) + self.forbidden = [vertex] + self.required = [] + + def trace(self, prefix, parents=None): + if parents is None: parents = [] + available = [i for i in range(self.score.data.shape[1]) if i not in self.forbidden] + return self.root.trace(prefix, available, parents) + + def set_knowledge(self, forbidden, required): + self.forbidden = forbidden # not implemented + self.required = required # not implemented + self.reset() + + def reset(self): + self.root = GSTNode(self) \ No newline at end of file diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py new file mode 100644 index 00000000..1d87d235 --- /dev/null +++ b/tests/TestBOSS.py @@ -0,0 +1,124 @@ +import unittest + +import numpy as np +from causallearn.graph.AdjacencyConfusion import AdjacencyConfusion +from causallearn.graph.ArrowConfusion import ArrowConfusion +from causallearn.graph.GeneralGraph import GeneralGraph +from causallearn.graph.GraphNode import GraphNode +from causallearn.search.PermutationBased.BOSS import boss +from causallearn.utils.DAG2CPDAG import dag2cpdag +from causallearn.utils.GraphUtils import GraphUtils +import matplotlib.image as mpimg +import matplotlib.pyplot as plt +import io + + +def simulate_data(p, d, n): + """ + Randomly generates an Erdos-Renyi direct acyclic graph given an ordering + and randomly simulates data with the provided parameters. + + p = |variables| + d = |edges| / |possible edges| + n = sample size + """ + + # npe = |possible edges| + pe = int(p * (p - 1) / 2) + + # ne = |edges| + ne = int(d * pe) + + # generate edges + e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne)) + np.random.shuffle(e) + B = np.zeros([p, p]) + B.T[np.triu_indices(p, 1)] = e + + # generate variance terms + O = np.diag(np.ones(p)) + + # simulate data + X = np.random.multivariate_normal(np.zeros(p), O, n) + for i in range(p): + J = np.where(B[i])[0] + for j in J: X[:, i] += B[i, j] * X[:, j] + + pi = [i for i in range(p)] + np.random.shuffle(pi) + + return (B != 0)[pi][:, pi], X[:, pi] + + +class TestBOSS(unittest.TestCase): + def test_boss(self): + ps = [30, 60] + # ps = [30] + ds = [0.1, 0.15] + n = 1000 + reps = 5 + + for p in ps: + for d in ds: + stats = [[], [], [], []] + for rep in range(1, reps + 1): + g0, X = simulate_data(p, d, n) + print( + "\nNodes:", p, + "| Edges:", np.sum(g0), + "| Avg Degree:", 2 * np.sum(g0) / p, + "| Rep:", rep, + ) + + node_names = [("X%d" % (i + 1)) for i in range(p)] + nodes = [] + + for name in node_names: + node = GraphNode(name) + nodes.append(node) + + G0 = GeneralGraph(nodes) + for y in range(p): + for x in np.where(g0[y] == 1)[0]: + G0.add_directed_edge(nodes[x], nodes[y]) + + G0 = dag2cpdag(G0) + + G = boss(X) + + # pyd = GraphUtils.to_pydot(G) + # tmp_png = pyd.create_png(f="png") + # fp = io.BytesIO(tmp_png) + # img = mpimg.imread(fp, format='png') + # plt.axis('off') + # plt.imshow(img) + # plt.show() + + AdjC = AdjacencyConfusion(G0, G) + stats[0].append(AdjC.get_adj_precision()) + stats[1].append(AdjC.get_adj_recall()) + + ArrC = ArrowConfusion(G0, G) + stats[2].append(ArrC.get_arrows_precision()) + stats[3].append(ArrC.get_arrows_recall()) + + print( + [ + ["AP", "AR", "AHP", "AHR"][i] + + ": " + + str(round(stats[i][-1], 2)) + for i in range(4) + ] + ) + + print( + "\nOverall Stats: \n", + [ + ["AP", "AR", "AHP", "AHR"][i] + + ": " + + str(round(np.sum(stats[i]) / reps, 2)) + for i in range(4) + ], + ) + + print("finish") diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py index de44ae1e..1a8e0638 100644 --- a/tests/TestGRaSP.py +++ b/tests/TestGRaSP.py @@ -11,63 +11,50 @@ import matplotlib.image as mpimg import matplotlib.pyplot as plt import io +import random -def random_dag(p, d): +def simulate_data(p, d, n): """ - Randomly generates an Erdos-Renyi direct acyclic graph given an ordering. + Randomly generates an Erdos-Renyi direct acyclic graph given an ordering + and randomly simulates data with the provided parameters. p = |variables| d = |edges| / |possible edges| + n = sample size """ # npe = |possible edges| pe = int(p * (p - 1) / 2) - # e = |edges| + # ne = |edges| ne = int(d * pe) # generate edges - e = np.append(np.zeros(pe - ne, "uint8"), np.ones(ne, "uint8")) + e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne)) np.random.shuffle(e) - - # generate graph - g = np.zeros([p, p], "uint8") - g.T[np.triu_indices(p, 1)] = e - - return g - - -def parameterize_dag(g): - """ - Randomly parameterize a directed acyclic graph. - - g = directed acyclic graph (adjacency matrix) - """ - - # p = |variables| - p = g.shape[0] - - # e = |edges| - e = np.sum(g) + B = np.zeros([p, p]) + B.T[np.triu_indices(p, 1)] = e # generate variance terms - o = np.diag(np.ones(p)) + O = np.diag(np.ones(p)) - # generate edge weights (edge parameters uniformly sampled [-1.0, 1.0]) - b = np.zeros([p, p]) - b[np.where(g == 1)] = np.random.uniform(-1, 1, e) + # simulate data + X = np.random.multivariate_normal(np.zeros(p), O, n) + for i in range(p): + J = np.where(B[i])[0] + for j in J: X[:, i] += B[i, j] * X[:, j] - # calculate covariance - s = np.dot(np.dot(np.linalg.inv(np.eye(p) - b), o), np.linalg.inv(np.eye(p) - b).T) + pi = [i for i in range(p)] + np.random.shuffle(pi) - return s + return (B != 0)[pi][:, pi], X[:, pi] class TestGRaSP(unittest.TestCase): def test_grasp(self): - # ps = [30, 60] - ps = [30] + ps = [30, 60] + # ps = [30] ds = [0.1, 0.15] n = 1000 reps = 5 @@ -76,15 +63,13 @@ def test_grasp(self): for d in ds: stats = [[], [], [], []] for rep in range(1, reps + 1): - g0 = random_dag(p, d) + g0, X = simulate_data(p, d, n) print( "\nNodes:", p, "| Edges:", np.sum(g0), "| Avg Degree:", 2 * np.sum(g0) / p, "| Rep:", rep, ) - cov = parameterize_dag(g0) - X = np.random.multivariate_normal(np.zeros(p), cov, n) node_names = [("X%d" % (i + 1)) for i in range(p)] nodes = [] @@ -100,7 +85,7 @@ def test_grasp(self): G0 = dag2cpdag(G0) - G = grasp(X) + G = grasp(X, depth=1) # pyd = GraphUtils.to_pydot(G) # tmp_png = pyd.create_png(f="png") From 58419ec0fa8b06b042ac0146a4ab97be51697b73 Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Wed, 17 Jul 2024 09:10:21 -0500 Subject: [PATCH 5/7] algorithms are working by efficiency is lackluster --- causallearn/search/PermutationBased/BOSS.py | 8 ++++---- causallearn/search/PermutationBased/GRaSP.py | 10 +++++----- causallearn/search/PermutationBased/gst.py | 12 ++++++------ tests/TestBOSS.py | 6 +++--- tests/TestGRaSP.py | 7 +++---- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py index b72d8f44..c77a6620 100644 --- a/causallearn/search/PermutationBased/BOSS.py +++ b/causallearn/search/PermutationBased/BOSS.py @@ -100,16 +100,16 @@ def boss( elif score_func == "local_score_BIC": # SEM BIC score warnings.warn("Please use 'local_score_BIC_from_cov' instead") - parameters = {} - parameters["lambda_value"] = 2 + if parameters is None: + parameters = {"lambda_value": 2} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) elif score_func == "local_score_BIC_from_cov": # SEM BIC score - parameters = {} - parameters["lambda_value"] = 2 + if parameters is None: + parameters = {"lambda_value": 2} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters ) diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index 09416b94..62bd7632 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -92,7 +92,7 @@ def grasp( ---------- X : data set (numpy ndarray), shape (n_samples, n_features). The input data, where n_samples is the number of samples and n_features is the number of features. score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general', - 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BDeu')). + 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BIC_from_cov', 'local_score_BDeu')). depth : allowed maximum depth for DFS parameters : when using CV likelihood, parameters['kfold']: k-fold cross validation @@ -158,16 +158,16 @@ def grasp( elif score_func == "local_score_BIC": # SEM BIC score warnings.warn("Please use 'local_score_BIC_from_cov' instead") - parameters = {} - parameters["lambda_value"] = 2 + if parameters is None: + parameters = {"lambda_value": 2} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) elif score_func == "local_score_BIC_from_cov": # SEM BIC score - parameters = {} - parameters["lambda_value"] = 2 + if parameters is None: + parameters = {"lambda_value": 2} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters ) diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py index 25e0da6b..70d07903 100644 --- a/causallearn/search/PermutationBased/gst.py +++ b/causallearn/search/PermutationBased/gst.py @@ -1,8 +1,8 @@ class GSTNode: def __init__(self, tree, add=None, score=None): - if score is None: score = -tree.score.score_nocache(tree.vertex, []) - # if score is None: score = -tree.score.score(tree.vertex, []) + # if score is None: score = -tree.score.score_nocache(tree.vertex, []) + if score is None: score = -tree.score.score(tree.vertex, []) self.tree = tree self.add = add self.grow_score = score @@ -17,8 +17,8 @@ def grow(self, available, parents): self.branches = [] for add in available: parents.append(add) - score = -self.tree.score.score_nocache(self.tree.vertex, parents) - # score = -self.tree.score.score(self.tree.vertex, parents) + # score = -self.tree.score.score_nocache(self.tree.vertex, parents) + score = -self.tree.score.score(self.tree.vertex, parents) parents.remove(add) branch = GSTNode(self.tree, add, score) if score > self.grow_score: self.branches.append(branch) @@ -30,8 +30,8 @@ def shrink(self, parents): best = None for remove in [parent for parent in parents]: parents.remove(remove) - score = -self.tree.score.score_nocache(self.tree.vertex, parents) - # score = -self.tree.score.score(self.tree.vertex, parents) + # score = -self.tree.score.score_nocache(self.tree.vertex, parents) + score = -self.tree.score.score(self.tree.vertex, parents) parents.append(remove) if score > self.shrink_score: self.shrink_score = score diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py index 1d87d235..f67fa5a3 100644 --- a/tests/TestBOSS.py +++ b/tests/TestBOSS.py @@ -30,7 +30,7 @@ def simulate_data(p, d, n): ne = int(d * pe) # generate edges - e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne)) + e = np.append(np.zeros(pe - ne), 0.5 * np.random.uniform(-1, 1, ne)) np.random.shuffle(e) B = np.zeros([p, p]) B.T[np.triu_indices(p, 1)] = e @@ -66,7 +66,7 @@ def test_boss(self): print( "\nNodes:", p, "| Edges:", np.sum(g0), - "| Avg Degree:", 2 * np.sum(g0) / p, + "| Avg Degree:", 2 * round(np.sum(g0) / p, 2), "| Rep:", rep, ) @@ -84,7 +84,7 @@ def test_boss(self): G0 = dag2cpdag(G0) - G = boss(X) + G = boss(X, parameters={'lambda_value': 2}) # pyd = GraphUtils.to_pydot(G) # tmp_png = pyd.create_png(f="png") diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py index 1a8e0638..0ac1d81d 100644 --- a/tests/TestGRaSP.py +++ b/tests/TestGRaSP.py @@ -11,7 +11,6 @@ import matplotlib.image as mpimg import matplotlib.pyplot as plt import io -import random def simulate_data(p, d, n): @@ -31,7 +30,7 @@ def simulate_data(p, d, n): ne = int(d * pe) # generate edges - e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne)) + e = np.append(np.zeros(pe - ne), 0.5 * np.random.uniform(-1, 1, ne)) np.random.shuffle(e) B = np.zeros([p, p]) B.T[np.triu_indices(p, 1)] = e @@ -67,7 +66,7 @@ def test_grasp(self): print( "\nNodes:", p, "| Edges:", np.sum(g0), - "| Avg Degree:", 2 * np.sum(g0) / p, + "| Avg Degree:", 2 * round(np.sum(g0) / p, 2), "| Rep:", rep, ) @@ -85,7 +84,7 @@ def test_grasp(self): G0 = dag2cpdag(G0) - G = grasp(X, depth=1) + G = grasp(X, depth=1, parameters={'lambda_value': 2}) # pyd = GraphUtils.to_pydot(G) # tmp_png = pyd.create_png(f="png") From 9ea1feddf57954d39f22e79fc57161628d8d682b Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Thu, 18 Jul 2024 13:01:03 -0500 Subject: [PATCH 6/7] final updates to BOSS and GRaSP --- causallearn/search/PermutationBased/BOSS.py | 11 +- causallearn/search/PermutationBased/GRaSP.py | 103 ++----------------- causallearn/search/PermutationBased/gst.py | 12 +-- tests/TestBOSS.py | 21 ++-- tests/TestGRaSP.py | 21 ++-- 5 files changed, 31 insertions(+), 137 deletions(-) diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py index c77a6620..465d3659 100644 --- a/causallearn/search/PermutationBased/BOSS.py +++ b/causallearn/search/PermutationBased/BOSS.py @@ -48,11 +48,11 @@ def boss( G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j. """ + X = X.copy() n, p = X.shape if n < p: warnings.warn("The number of features is much larger than the sample size!") - X = np.mat(X) if score_func == "local_score_CV_general": # % k-fold negative cross validated likelihood based on regression in RKHS if parameters is None: @@ -63,14 +63,12 @@ def boss( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_general, parameters=parameters ) - elif score_func == "local_score_marginal_general": # negative marginal likelihood based on regression in RKHS parameters = {} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_general, parameters=parameters ) - elif score_func == "local_score_CV_multi": # k-fold negative cross validated likelihood based on regression in RKHS # for data with multi-variate dimensions @@ -85,7 +83,6 @@ def boss( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_multi, parameters=parameters ) - elif score_func == "local_score_marginal_multi": # negative marginal likelihood based on regression in RKHS # for data with multi-variate dimensions @@ -96,7 +93,6 @@ def boss( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_multi, parameters=parameters ) - elif score_func == "local_score_BIC": # SEM BIC score warnings.warn("Please use 'local_score_BIC_from_cov' instead") @@ -105,7 +101,6 @@ def boss( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) - elif score_func == "local_score_BIC_from_cov": # SEM BIC score if parameters is None: @@ -113,17 +108,15 @@ def boss( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters ) - elif score_func == "local_score_BDeu": # BDeu score localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None ) - else: raise Exception("Unknown function!") + score = localScoreClass - gsts = [GST(i, score) for i in range(p)] node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index 62bd7632..f4d4aa14 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -106,11 +106,11 @@ def grasp( G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j. """ + X = X.copy() n, p = X.shape if n < p: warnings.warn("The number of features is much larger than the sample size!") - X = np.mat(X) if score_func == "local_score_CV_general": # k-fold negative cross validated likelihood based on regression in RKHS if parameters is None: @@ -121,14 +121,12 @@ def grasp( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_general, parameters=parameters ) - elif score_func == "local_score_marginal_general": # negative marginal likelihood based on regression in RKHS parameters = {} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_general, parameters=parameters ) - elif score_func == "local_score_CV_multi": # k-fold negative cross validated likelihood based on regression in RKHS # for data with multi-variate dimensions @@ -143,7 +141,6 @@ def grasp( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_multi, parameters=parameters ) - elif score_func == "local_score_marginal_multi": # negative marginal likelihood based on regression in RKHS # for data with multi-variate dimensions @@ -154,7 +151,6 @@ def grasp( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_multi, parameters=parameters ) - elif score_func == "local_score_BIC": # SEM BIC score warnings.warn("Please use 'local_score_BIC_from_cov' instead") @@ -163,7 +159,6 @@ def grasp( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) - elif score_func == "local_score_BIC_from_cov": # SEM BIC score if parameters is None: @@ -171,17 +166,15 @@ def grasp( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters ) - elif score_func == "local_score_BDeu": # BDeu score localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None ) - else: raise Exception("Unknown function!") - score = localScoreClass + score = localScoreClass gsts = [GST(i, score) for i in range(p)] node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names @@ -201,15 +194,11 @@ def grasp( y_parents = order.get_parents(y) candidates = [order.get(j) for j in range(0, i)] - # local_score = gsts[y].trace(candidates) - - grow(y, y_parents, candidates, score) - local_score = shrink(y, y_parents, score) - + local_score = gsts[y].trace(candidates, y_parents) order.set_local_score(y, local_score) order.bump_edges(len(y_parents)) - while dfs(depth - 1, set(), [], order, score, gsts): + while dfs(depth - 1, set(), [], order, gsts): if verbose: sys.stdout.write("\rGRaSP edge count: %i " % order.get_edges()) sys.stdout.flush() @@ -230,7 +219,7 @@ def grasp( # performs a dfs over covered tucks -def dfs(depth: int, flipped: set, history: List[set], order, score, gsts): +def dfs(depth: int, flipped: set, history: List[set], order, gsts): cache = [{}, {}, {}, 0] @@ -258,7 +247,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score, gsts): cache[3] = order.get_edges() tuck(i, j, order) - edge_bump, score_bump = update(i, j, order, score, gsts) + edge_bump, score_bump = update(i, j, order, gsts) # because things that should be zero sometimes are not if score_bump > 1e-6: @@ -277,7 +266,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score, gsts): if len(flipped) > 0 and flipped not in history: history.append(flipped) - if depth > 0 and dfs(depth - 1, flipped, history, order, score, gsts): + if depth > 0 and dfs(depth - 1, flipped, history, order, gsts): return True del history[-1] @@ -292,7 +281,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score, gsts): # updates the parents and scores after a tuck -def update(i: int, j: int, order, score, gsts): +def update(i: int, j: int, order, gsts): edge_bump = 0 old_score = 0 @@ -307,19 +296,7 @@ def update(i: int, j: int, order, score, gsts): z_parents.clear() candidates = [order.get(l) for l in range(0, k)] - - # for w in [w for w in z_parents if w not in candidates]: - # z_parents.remove(w) - # shrink(z, z_parents, score) - - # for w in z_parents: - # candidates.remove(w) - - # local_score = gsts[z].trace(candidates, z_parents) - - grow(z, z_parents, candidates, score) - local_score = shrink(z, z_parents, score) - + local_score = gsts[z].trace(candidates, z_parents) order.set_local_score(z, local_score) edge_bump += len(z_parents) @@ -328,68 +305,6 @@ def update(i: int, j: int, order, score, gsts): return edge_bump, new_score - old_score -# grow of grow-shrink -def grow(y: int, y_parents: List[int], candidates: List[int], score): - - best = -score.score(y, y_parents) - # best = -score.score_nocache(y, y_parents) - - add = None - checked = [] - while add is not None or len(candidates) > 0: - - if add is not None: - checked.remove(add) - y_parents.append(add) - candidates = checked - checked = [] - add = None - - while len(candidates) > 0: - - x = candidates.pop() - y_parents.append(x) - current = -score.score(y, y_parents) - # current = -score.score_nocache(y, y_parents) - y_parents.remove(x) - checked.append(x) - - if current > best: - best = current - add = x - - return best - - -# shrink of grow-shrink -def shrink(y: int, y_parents: List[int], score): - - best = -score.score(y, y_parents) - # best = -score.score_nocache(y, y_parents) - - remove = None - checked = 0 - while remove is not None or checked < len(y_parents): - - if remove is not None: - y_parents.remove(remove) - checked = 0 - remove = None - - while checked < len(y_parents): - x = y_parents.pop(0) - current = -score.score(y, y_parents) - # current = -score.score_nocache(y, y_parents) - y_parents.append(x) - checked += 1 - - if current > best: - best = current - remove = x - - return best - - # tucks the node at position i into position j def tuck(i: int, j: int, order): diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py index 70d07903..25e0da6b 100644 --- a/causallearn/search/PermutationBased/gst.py +++ b/causallearn/search/PermutationBased/gst.py @@ -1,8 +1,8 @@ class GSTNode: def __init__(self, tree, add=None, score=None): - # if score is None: score = -tree.score.score_nocache(tree.vertex, []) - if score is None: score = -tree.score.score(tree.vertex, []) + if score is None: score = -tree.score.score_nocache(tree.vertex, []) + # if score is None: score = -tree.score.score(tree.vertex, []) self.tree = tree self.add = add self.grow_score = score @@ -17,8 +17,8 @@ def grow(self, available, parents): self.branches = [] for add in available: parents.append(add) - # score = -self.tree.score.score_nocache(self.tree.vertex, parents) - score = -self.tree.score.score(self.tree.vertex, parents) + score = -self.tree.score.score_nocache(self.tree.vertex, parents) + # score = -self.tree.score.score(self.tree.vertex, parents) parents.remove(add) branch = GSTNode(self.tree, add, score) if score > self.grow_score: self.branches.append(branch) @@ -30,8 +30,8 @@ def shrink(self, parents): best = None for remove in [parent for parent in parents]: parents.remove(remove) - # score = -self.tree.score.score_nocache(self.tree.vertex, parents) - score = -self.tree.score.score(self.tree.vertex, parents) + score = -self.tree.score.score_nocache(self.tree.vertex, parents) + # score = -self.tree.score.score(self.tree.vertex, parents) parents.append(remove) if score > self.shrink_score: self.shrink_score = score diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py index f67fa5a3..ea963d47 100644 --- a/tests/TestBOSS.py +++ b/tests/TestBOSS.py @@ -7,10 +7,8 @@ from causallearn.graph.GraphNode import GraphNode from causallearn.search.PermutationBased.BOSS import boss from causallearn.utils.DAG2CPDAG import dag2cpdag -from causallearn.utils.GraphUtils import GraphUtils -import matplotlib.image as mpimg -import matplotlib.pyplot as plt -import io + +import gc def simulate_data(p, d, n): @@ -53,11 +51,13 @@ def simulate_data(p, d, n): class TestBOSS(unittest.TestCase): def test_boss(self): ps = [30, 60] - # ps = [30] ds = [0.1, 0.15] n = 1000 reps = 5 + gc.set_threshold(20000, 50, 50) + # gc.set_debug(gc.DEBUG_STATS) + for p in ps: for d in ds: stats = [[], [], [], []] @@ -84,15 +84,8 @@ def test_boss(self): G0 = dag2cpdag(G0) - G = boss(X, parameters={'lambda_value': 2}) - - # pyd = GraphUtils.to_pydot(G) - # tmp_png = pyd.create_png(f="png") - # fp = io.BytesIO(tmp_png) - # img = mpimg.imread(fp, format='png') - # plt.axis('off') - # plt.imshow(img) - # plt.show() + G = boss(X, parameters={'lambda_value': 4}) + gc.collect() AdjC = AdjacencyConfusion(G0, G) stats[0].append(AdjC.get_adj_precision()) diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py index 0ac1d81d..f1fe0dd0 100644 --- a/tests/TestGRaSP.py +++ b/tests/TestGRaSP.py @@ -7,10 +7,8 @@ from causallearn.graph.GraphNode import GraphNode from causallearn.search.PermutationBased.GRaSP import grasp from causallearn.utils.DAG2CPDAG import dag2cpdag -from causallearn.utils.GraphUtils import GraphUtils -import matplotlib.image as mpimg -import matplotlib.pyplot as plt -import io + +import gc def simulate_data(p, d, n): @@ -53,11 +51,13 @@ def simulate_data(p, d, n): class TestGRaSP(unittest.TestCase): def test_grasp(self): ps = [30, 60] - # ps = [30] ds = [0.1, 0.15] n = 1000 reps = 5 + gc.set_threshold(20000, 50, 50) + # gc.set_debug(gc.DEBUG_STATS) + for p in ps: for d in ds: stats = [[], [], [], []] @@ -84,15 +84,8 @@ def test_grasp(self): G0 = dag2cpdag(G0) - G = grasp(X, depth=1, parameters={'lambda_value': 2}) - - # pyd = GraphUtils.to_pydot(G) - # tmp_png = pyd.create_png(f="png") - # fp = io.BytesIO(tmp_png) - # img = mpimg.imread(fp, format='png') - # plt.axis('off') - # plt.imshow(img) - # plt.show() + G = grasp(X, depth=1, parameters={'lambda_value': 4}) + gc.collect() AdjC = AdjacencyConfusion(G0, G) stats[0].append(AdjC.get_adj_precision()) From 53995155e4be2ee400ad6594630cdd7c08ea2ed1 Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Thu, 18 Jul 2024 13:01:54 -0500 Subject: [PATCH 7/7] final updates --- causallearn/score/LocalScoreFunction.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/causallearn/score/LocalScoreFunction.py b/causallearn/score/LocalScoreFunction.py index e02ba737..a4af5361 100644 --- a/causallearn/score/LocalScoreFunction.py +++ b/causallearn/score/LocalScoreFunction.py @@ -34,9 +34,9 @@ def local_score_BIC(Data: ndarray, i: int, PAi: List[int], parameters=None) -> f if len(PAi) == 0: return n * np.log(cov[i, i]) - yX = np.mat(cov[np.ix_([i], PAi)]) - XX = np.mat(cov[np.ix_(PAi, PAi)]) - H = np.log(cov[i, i] - yX * np.linalg.inv(XX) * yX.T) + yX = cov[np.ix_([i], PAi)] + XX = cov[np.ix_(PAi, PAi)] + H = np.log(cov[i, i] - yX @ np.linalg.inv(XX) @ yX.T) return n * H + np.log(n) * len(PAi) * lambda_value @@ -68,9 +68,9 @@ def local_score_BIC_from_cov( if len(PAi) == 0: return n * np.log(cov[i, i]) - yX = np.mat(cov[np.ix_([i], PAi)]) - XX = np.mat(cov[np.ix_(PAi, PAi)]) - H = np.log(cov[i, i] - yX * np.linalg.inv(XX) * yX.T) + yX = cov[np.ix_([i], PAi)] + XX = cov[np.ix_(PAi, PAi)] + H = np.log(cov[i, i] - yX @ np.linalg.inv(XX) @ yX.T) return n * H + np.log(n) * len(PAi) * lambda_value