From 8dcec8ef23571a1c293bab157f6bd9fa3861a98e Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Fri, 5 Jul 2024 13:21:41 -0500 Subject: [PATCH 01/20] Add new func --- thicket/thicket.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index 8d0e2a33..0fadd30a 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -15,7 +15,11 @@ import pandas as pd import numpy as np -from hatchet import GraphFrame +from hatchet import ( + frame, + GraphFrame, + node, +) from hatchet.graph import Graph from hatchet.query import QueryEngine from thicket.query import ( @@ -1514,6 +1518,35 @@ def get_unique_metadata(self): return sorted_meta + + def add_root_node(self, attrs): + """Add node at root level""" + assert self.graph is self.statsframe.graph + + new_node = node.Node( + frame_obj=frame.Frame( + attrs=attrs + ), + hnid=len(self.graph) + ) + + # graph and statsframe.graph + self.graph.roots.append(new_node) + + # dataframe + idx_levels = self.dataframe.index.names + new_idx = [[new_node]] + [self.profile] + new_node_df = pd.DataFrame( + index=pd.MultiIndex.from_product(new_idx, names=idx_levels) + ) + self.dataframe = pd.concat([self.dataframe, new_node_df]) + + # statsframe.dataframe + self.statsframe.dataframe = helpers._new_statsframe_df(self.dataframe) + + assert self.graph is self.statsframe.graph + + def _sync_profile_components(self, component): """Synchronize the Performance DataFrame, Metadata Dataframe, profile and profile mapping objects based on the component's index or a list of profiles. From 77e8f7a62bf73b9916925148167c87954e189fa2 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Sun, 7 Jul 2024 22:34:07 -0500 Subject: [PATCH 02/20] Add name col to df --- thicket/thicket.py | 1 + 1 file changed, 1 insertion(+) diff --git a/thicket/thicket.py b/thicket/thicket.py index 0fadd30a..3185f0b4 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1539,6 +1539,7 @@ def add_root_node(self, attrs): new_node_df = pd.DataFrame( index=pd.MultiIndex.from_product(new_idx, names=idx_levels) ) + new_node_df["name"] = attrs["name"] self.dataframe = pd.concat([self.dataframe, new_node_df]) # statsframe.dataframe From 8dd8f13e2b77010d7a2ef053f276a41bd1dde5b1 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Tue, 9 Jul 2024 13:46:54 -0500 Subject: [PATCH 03/20] Add get_node function --- thicket/thicket.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index 3185f0b4..630e997c 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1518,17 +1518,11 @@ def get_unique_metadata(self): return sorted_meta - def add_root_node(self, attrs): """Add node at root level""" assert self.graph is self.statsframe.graph - new_node = node.Node( - frame_obj=frame.Frame( - attrs=attrs - ), - hnid=len(self.graph) - ) + new_node = node.Node(frame_obj=frame.Frame(attrs=attrs), hnid=len(self.graph)) # graph and statsframe.graph self.graph.roots.append(new_node) @@ -1547,6 +1541,14 @@ def add_root_node(self, attrs): assert self.graph is self.statsframe.graph + def get_node(self, name): + node = [n for n in self.graph.traverse() if n.frame["name"] == name] + + if len(node) > 1: + warnings.warn(f'More than one node with name "{name}". Returning a list') + return node + + return node.pop() def _sync_profile_components(self, component): """Synchronize the Performance DataFrame, Metadata Dataframe, profile and From 6949c721c9902ed97c8bac7cbe52639e8ade7b1a Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Tue, 9 Jul 2024 14:44:26 -0500 Subject: [PATCH 04/20] Add docstring --- thicket/thicket.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index 630e997c..281bd933 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1519,8 +1519,12 @@ def get_unique_metadata(self): return sorted_meta def add_root_node(self, attrs): - """Add node at root level""" - assert self.graph is self.statsframe.graph + """Add node at root level with given attributes. + + Arguments: + attrs (dict): attributes for the new node which will be used to initilize the + node.frame. + """ new_node = node.Node(frame_obj=frame.Frame(attrs=attrs), hnid=len(self.graph)) @@ -1539,9 +1543,16 @@ def add_root_node(self, attrs): # statsframe.dataframe self.statsframe.dataframe = helpers._new_statsframe_df(self.dataframe) - assert self.graph is self.statsframe.graph - def get_node(self, name): + """Get a node object in the Thicket by its node.frame['name']. If more than one + node has the same name, a list of nodes is returned. + + Arguments: + name (str): name of the node (node.frame['name']). + + Returns: + (node): Hatchet Node object + """ node = [n for n in self.graph.traverse() if n.frame["name"] == name] if len(node) > 1: From 2223c8e7624d060fe986a736e7e1c2f3e3ea3e52 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Tue, 9 Jul 2024 15:06:10 -0500 Subject: [PATCH 05/20] Avoid F402 --- thicket/thicket.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index 281bd933..570a1e08 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -15,10 +15,10 @@ import pandas as pd import numpy as np +import hatchet.node from hatchet import ( frame, GraphFrame, - node, ) from hatchet.graph import Graph from hatchet.query import QueryEngine @@ -1526,7 +1526,9 @@ def add_root_node(self, attrs): node.frame. """ - new_node = node.Node(frame_obj=frame.Frame(attrs=attrs), hnid=len(self.graph)) + new_node = hatchet.node.Node( + frame_obj=frame.Frame(attrs=attrs), hnid=len(self.graph) + ) # graph and statsframe.graph self.graph.roots.append(new_node) From b969ccfd3f0e00bf3edefcdad530a0e9b5160407 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Tue, 9 Jul 2024 21:07:52 -0500 Subject: [PATCH 06/20] call stats reapply func --- thicket/thicket.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/thicket/thicket.py b/thicket/thicket.py index 570a1e08..b322e90a 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1544,6 +1544,8 @@ def add_root_node(self, attrs): # statsframe.dataframe self.statsframe.dataframe = helpers._new_statsframe_df(self.dataframe) + # Reapply stats operations after clearing statsframe dataframe + self.reapply_stats_operations() def get_node(self, name): """Get a node object in the Thicket by its node.frame['name']. If more than one From c9db841dfab38553e0f9146c4b1fcc77282cc918 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:13:41 -0500 Subject: [PATCH 07/20] Change pop --- thicket/thicket.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index b322e90a..ff9f996f 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1563,7 +1563,7 @@ def get_node(self, name): warnings.warn(f'More than one node with name "{name}". Returning a list') return node - return node.pop() + return node[0] def _sync_profile_components(self, component): """Synchronize the Performance DataFrame, Metadata Dataframe, profile and From 3b4c8f5e3e2660033f16d74abe87f45c6a8641f7 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:22:18 -0500 Subject: [PATCH 08/20] Change imports to be consistent --- thicket/thicket.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index ff9f996f..eea4c503 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -15,12 +15,10 @@ import pandas as pd import numpy as np -import hatchet.node -from hatchet import ( - frame, - GraphFrame, -) +from hatchet import GraphFrame +from hatchet.frame import Frame from hatchet.graph import Graph +from hatchet.node import Node from hatchet.query import QueryEngine from thicket.query import ( Query, @@ -1526,8 +1524,8 @@ def add_root_node(self, attrs): node.frame. """ - new_node = hatchet.node.Node( - frame_obj=frame.Frame(attrs=attrs), hnid=len(self.graph) + new_node = Node( + frame_obj=Frame(attrs=attrs), hnid=len(self.graph) ) # graph and statsframe.graph From c38e7c8fb3951281697b0f5bd797d6f458bc053d Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:22:33 -0500 Subject: [PATCH 09/20] Fix grammar --- thicket/thicket.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index eea4c503..c7fe72f4 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1546,11 +1546,11 @@ def add_root_node(self, attrs): self.reapply_stats_operations() def get_node(self, name): - """Get a node object in the Thicket by its node.frame['name']. If more than one + """Get a node object in the Thicket by its Node.frame['name']. If more than one node has the same name, a list of nodes is returned. Arguments: - name (str): name of the node (node.frame['name']). + name (str): name of the node (Node.frame['name']). Returns: (node): Hatchet Node object From 2886c460efd7ad2d2067d1fff7c6959e1f2d3b84 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:31:33 -0500 Subject: [PATCH 10/20] Raise error if node not found --- thicket/thicket.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/thicket/thicket.py b/thicket/thicket.py index c7fe72f4..eb10e45d 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1560,6 +1560,8 @@ def get_node(self, name): if len(node) > 1: warnings.warn(f'More than one node with name "{name}". Returning a list') return node + elif len(node) == 0: + raise ValueError(f'Node with name "{name}" not found.') return node[0] From 13b69e76234bf1a115f1e203bf94a120c3d7b49d Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:31:59 -0500 Subject: [PATCH 11/20] Add unit tests --- thicket/tests/test_add_root_node.py | 26 ++++++++++++++++++++++++++ thicket/tests/test_get_node.py | 20 ++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 thicket/tests/test_add_root_node.py create mode 100644 thicket/tests/test_get_node.py diff --git a/thicket/tests/test_add_root_node.py b/thicket/tests/test_add_root_node.py new file mode 100644 index 00000000..30b270c6 --- /dev/null +++ b/thicket/tests/test_add_root_node.py @@ -0,0 +1,26 @@ +# Copyright 2022 Lawrence Livermore National Security, LLC and other +# Thicket Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +from hatchet.node import Node + + +def test_add_root_node(literal_thickets): + tk, tk2, tk3 = literal_thickets + + assert len(tk.graph) == 4 + + tk.add_root_node({"name": "Test", "type": "function"}) + + test_node = tk.get_node("Test") + + # Check if node was inserted in all components + assert isinstance(test_node, Node) + assert len(tk.graph) == 5 + assert len(tk.statsframe.graph) == 5 + assert test_node in tk.dataframe.index.get_level_values("node") + assert test_node in tk.statsframe.dataframe.index.get_level_values("node") + + assert tk.dataframe.loc[test_node, "name"].values[0] == "Test" + assert tk.statsframe.dataframe.loc[test_node, "name"] == "Test" diff --git a/thicket/tests/test_get_node.py b/thicket/tests/test_get_node.py new file mode 100644 index 00000000..ac9b872f --- /dev/null +++ b/thicket/tests/test_get_node.py @@ -0,0 +1,20 @@ +# Copyright 2022 Lawrence Livermore National Security, LLC and other +# Thicket Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +import pytest + + +def test_get_node(literal_thickets): + tk, tk2, tk3 = literal_thickets + + with pytest.raises(ValueError): + foo = tk.get_node("Foo") + + baz = tk.get_node("Baz") + + # Check node properties + assert baz.frame["name"] == "Baz" + assert baz.frame["type"] == "function" + assert baz._hatchet_nid == 0 From c38a78fc79a0344fb7b6f53f51ffefb6c2253abf Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:43:06 -0500 Subject: [PATCH 12/20] Set depth and hnid automatically --- thicket/thicket.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index eb10e45d..41c53c14 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1525,12 +1525,17 @@ def add_root_node(self, attrs): """ new_node = Node( - frame_obj=Frame(attrs=attrs), hnid=len(self.graph) + frame_obj=Frame(attrs=attrs) ) # graph and statsframe.graph self.graph.roots.append(new_node) + # Set depth + self.graph.enumerate_depth() + # Set hatchet nid + self.graph.enumerate_traverse() + # dataframe idx_levels = self.dataframe.index.names new_idx = [[new_node]] + [self.profile] From e2035f77113a1c44b306b73e89fed673b2a1d6df Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:43:21 -0500 Subject: [PATCH 13/20] Add validation check at the end --- thicket/thicket.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/thicket/thicket.py b/thicket/thicket.py index 41c53c14..47f06752 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1550,6 +1550,9 @@ def add_root_node(self, attrs): # Reapply stats operations after clearing statsframe dataframe self.reapply_stats_operations() + # Check Thicket state + validate_nodes(self) + def get_node(self, name): """Get a node object in the Thicket by its Node.frame['name']. If more than one node has the same name, a list of nodes is returned. From cb5d2f858c203ba524971cb0e5668abd2392ff9e Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:44:04 -0500 Subject: [PATCH 14/20] update unit test --- thicket/tests/test_add_root_node.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/thicket/tests/test_add_root_node.py b/thicket/tests/test_add_root_node.py index 30b270c6..5dd4760d 100644 --- a/thicket/tests/test_add_root_node.py +++ b/thicket/tests/test_add_root_node.py @@ -11,12 +11,15 @@ def test_add_root_node(literal_thickets): assert len(tk.graph) == 4 + # Call add_root_node tk.add_root_node({"name": "Test", "type": "function"}) - + # Get node variable test_node = tk.get_node("Test") # Check if node was inserted in all components assert isinstance(test_node, Node) + assert test_node._hatchet_nid == 3 + assert test_node._depth == 0 assert len(tk.graph) == 5 assert len(tk.statsframe.graph) == 5 assert test_node in tk.dataframe.index.get_level_values("node") From ee996a9af6211435544a735b5a7532ec426af3f5 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:44:25 -0500 Subject: [PATCH 15/20] Black --- thicket/thicket.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index 47f06752..6b940954 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1524,9 +1524,7 @@ def add_root_node(self, attrs): node.frame. """ - new_node = Node( - frame_obj=Frame(attrs=attrs) - ) + new_node = Node(frame_obj=Frame(attrs=attrs)) # graph and statsframe.graph self.graph.roots.append(new_node) From 9e2438d5b213dffce5978fe6b9dfd3a943ad85b4 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:47:31 -0500 Subject: [PATCH 16/20] enumerate_traverse calls enumerate_depth --- thicket/thicket.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index 6b940954..c55b1541 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1529,9 +1529,7 @@ def add_root_node(self, attrs): # graph and statsframe.graph self.graph.roots.append(new_node) - # Set depth - self.graph.enumerate_depth() - # Set hatchet nid + # Set hatchet nid and depth self.graph.enumerate_traverse() # dataframe From 7294aac49d8dc7376b88e61b1d04d5720afa65df Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jul 2024 14:50:25 -0500 Subject: [PATCH 17/20] Fix flake check --- thicket/tests/test_get_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thicket/tests/test_get_node.py b/thicket/tests/test_get_node.py index ac9b872f..c7cbcf5c 100644 --- a/thicket/tests/test_get_node.py +++ b/thicket/tests/test_get_node.py @@ -10,7 +10,7 @@ def test_get_node(literal_thickets): tk, tk2, tk3 = literal_thickets with pytest.raises(ValueError): - foo = tk.get_node("Foo") + tk.get_node("Foo") baz = tk.get_node("Baz") From 53f87188195e54fc791b9368ada1edbb42852879 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Mon, 22 Jul 2024 16:48:15 -0500 Subject: [PATCH 18/20] Properly mark unused arguments --- thicket/tests/test_add_root_node.py | 2 +- thicket/tests/test_get_node.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thicket/tests/test_add_root_node.py b/thicket/tests/test_add_root_node.py index 5dd4760d..e5b81747 100644 --- a/thicket/tests/test_add_root_node.py +++ b/thicket/tests/test_add_root_node.py @@ -7,7 +7,7 @@ def test_add_root_node(literal_thickets): - tk, tk2, tk3 = literal_thickets + tk, _, _ = literal_thickets assert len(tk.graph) == 4 diff --git a/thicket/tests/test_get_node.py b/thicket/tests/test_get_node.py index c7cbcf5c..33baacf8 100644 --- a/thicket/tests/test_get_node.py +++ b/thicket/tests/test_get_node.py @@ -7,7 +7,7 @@ def test_get_node(literal_thickets): - tk, tk2, tk3 = literal_thickets + tk, _, _ = literal_thickets with pytest.raises(ValueError): tk.get_node("Foo") From 5053f832f9f1280cf0d7cdcbd4fb116e7dc25cd9 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Mon, 22 Jul 2024 16:56:19 -0500 Subject: [PATCH 19/20] Change ValueError to KeyError --- thicket/tests/test_get_node.py | 2 +- thicket/thicket.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thicket/tests/test_get_node.py b/thicket/tests/test_get_node.py index 33baacf8..06b0075a 100644 --- a/thicket/tests/test_get_node.py +++ b/thicket/tests/test_get_node.py @@ -9,7 +9,7 @@ def test_get_node(literal_thickets): tk, _, _ = literal_thickets - with pytest.raises(ValueError): + with pytest.raises(KeyError): tk.get_node("Foo") baz = tk.get_node("Baz") diff --git a/thicket/thicket.py b/thicket/thicket.py index c55b1541..a2307205 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1565,7 +1565,7 @@ def get_node(self, name): warnings.warn(f'More than one node with name "{name}". Returning a list') return node elif len(node) == 0: - raise ValueError(f'Node with name "{name}" not found.') + raise KeyError(f'Node with name "{name}" not found.') return node[0] From fcc266a7db12f0eff3f04f20e3cdbe419dc5aa3a Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Mon, 22 Jul 2024 17:01:43 -0500 Subject: [PATCH 20/20] Remove warning, add to docstring --- thicket/thicket.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index a2307205..afb30e9c 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1557,17 +1557,15 @@ def get_node(self, name): name (str): name of the node (Node.frame['name']). Returns: - (node): Hatchet Node object + (Node or list(Node)): Node object with the given name or list of Node objects + with the given name. """ node = [n for n in self.graph.traverse() if n.frame["name"] == name] - if len(node) > 1: - warnings.warn(f'More than one node with name "{name}". Returning a list') - return node - elif len(node) == 0: + if len(node) == 0: raise KeyError(f'Node with name "{name}" not found.') - return node[0] + return node[0] if len(node) == 1 else node def _sync_profile_components(self, component): """Synchronize the Performance DataFrame, Metadata Dataframe, profile and