From b6cc24e9c0bbcfe4b5c6f5979b9a051fa658e5fc Mon Sep 17 00:00:00 2001 From: Elahe-ek Date: Sun, 12 Nov 2023 22:49:15 -0500 Subject: [PATCH 1/2] Update docstrings --- tgx/utils/stat.py | 313 +++++++++++++++++++++++----------------------- 1 file changed, 156 insertions(+), 157 deletions(-) diff --git a/tgx/utils/stat.py b/tgx/utils/stat.py index fbaf2a7..616f678 100644 --- a/tgx/utils/stat.py +++ b/tgx/utils/stat.py @@ -2,138 +2,164 @@ import networkx as nx import numpy as np from tgx.utils.graph_utils import train_test_split +from typing import List, Dict -__all__ = ["degree_over_time", - "nodes_over_time", - "edges_over_time", - "nodes_and_edges_over_time", +__all__ = ["average_degree_per_ts", + "nodes_per_ts", + "edges_per_ts", + "nodes_and_edges_per_ts", "get_avg_e_per_ts", "get_avg_degree", "get_num_timestamps", "get_num_unique_edges", "get_reoccurrence", "get_surprise", - "get_novelty", "get_avg_node_activity", "get_avg_node_engagement", "degree_density"] -def degree_over_time(graph: object, - network_name: str, - filepath: str = ".") -> None: +def average_degree_per_ts(graph: list, + total_nodes: int, + network_name: str = None, + plot_path: str = None) -> None: r''' Plot average degree per timestamp. Parameters: - graph: Graph object created by tgx.Graph containing edgelist - total_nodes: number of nodes that appear through all the snapshots - network_name: name of the graph to be used in the output file name - filepath: path to save the output figure + graph: a list containing graph snapshots + total_nodes: number of nodes that appear through all the snapshots + network_name: name of the graph to be used in the output file name + plot_path: path to save the output figure ''' print("Plotting average degree per timestamp") - ave_degree = _calculate_average_degree_per_ts(graph) - + ave_degree = _calculate_average_degree_per_ts(graph, total_nodes) if network_name is not None: filename = f"{network_name}_ave_degree_per_ts" else: filename = "ave_degree_per_ts" - plot_for_snapshots(ave_degree, filename, "Average degree", plot_path = filepath) - + plot_for_snapshots(ave_degree, filename, "Average degree", plot_path = plot_path) print("Plotting Done!") return - -def nodes_over_time(graph: object, - network_name: str, - filepath: str = ".") -> None: - +def nodes_per_ts(graph: list, + network_name: str = None, + plot_path: str = None) -> None: r''' Plot number of active nodes per timestamp. Parameters: - graph: Graph object created by tgx.Graph containing edgelist - network_name: name of the graph to be used in the output file name - filepath: path to save the output figure + graph: a list containing graph snapshots + network_name: name of the graph to be used in the output file name + plot_path: path to save the output figure ''' - print("Plotting number of nodes per timestamp.") + print("Plotting number of nodes per timestamp") active_nodes = _calculate_node_per_ts(graph) if network_name is not None: filename = f"{network_name}_nodes_per_ts" else: filename = "nodes_per_ts" - plot_for_snapshots(active_nodes, filename, "Number of nodes", plot_path = filepath) - # print("Plotting Done!") + plot_for_snapshots(active_nodes, filename, "Number of nodes", plot_path = plot_path) + print("Plotting Done!") return -def edges_over_time(graph: object, +def edges_per_ts(graph: list, plot_path: str = None, - network_name: str = None, - filepath: str = ".") -> None: + network_name: str = None) -> None: r''' Plot number of edges per timestamp. Parameters: - graph: Graph object created by tgx.Graph containing edgelist - network_name: name of the graph to be used in the output file name - filepath: path to save the output figure + graph: a list containing graph snapshots + network_name: name of the graph to be used in the output file name + plot_path: path to save the output figure ''' - print("Plotting number of edges per timestamp.") + print("Plotting number of edges per timestamp") active_edges = _calculate_edge_per_ts(graph) if network_name is not None: filename = f"{network_name}_edges_per_ts" else: filename = "_edges_per_ts" - plot_for_snapshots(active_edges, plot_path, filename, "Number of edges", plot_path = filepath) - # print("Plotting Done!") + plot_for_snapshots(active_edges, plot_path, filename, "Number of edges") + print("Plotting Done!") return -def nodes_and_edges_over_time(graph: object, +def nodes_and_edges_per_ts(graph: list, network_name: str , - filepath: str = "."): + plot_path: str = None): r""" Plot number of nodes per timestamp and number of edges per timestamp in one fiugre. Parameters: - graph: Graph object created by tgx.Graph containing edgelist - network_name: name of the graph to be used in the output file name - filepath: path to save the output figure + graph: a list containing graph snapshots + network_name: name of the graph to be used in the output file name + plot_path: path to save the output figure """ - print("Plotting number of nodes and edges per timestamp.") edges = _calculate_edge_per_ts(graph) nodes = _calculate_node_per_ts(graph) - ts = list(range(0, len(graph.data))) + ts = list(range(0, len(graph))) + return plot_nodes_edges_per_ts(edges, nodes, ts, network_name = network_name, plot_path = plot_path) + - return plot_nodes_edges_per_ts(edges, nodes, ts, network_name = network_name, plot_path = filepath) +def connected_components_per_ts(graph: list, + network_name: str = None, + plot_path: str = None) -> None: + r""" + Plot number of connected components per timestamp + Parameters: + graph: a list containing graph snapshots + network_name: name of the graph to be used in the output file name + plot_path: path to save the output figure + """ + num_components = [] + for t in range(len(graph)): + parent = list(range(graph[t].number_of_nodes)) + + for _, edge_data in graph[t].edgelist.items(): + for (u, v), _ in edge_data.items(): + _merge(u, v, parent) + + num = 0 + for u in graph[t].nodes(): + if parent[u] == u: + num += 1 + num_components.append(num) + + if network_name is not None: + filename = f"{network_name}_connected_components_per_ts" + else: + filename = "_connected_components_per_ts" + plot_for_snapshots(num_components, filename, "Number of connected components", plot_path = plot_path) + print(num_components) + print("Plotting Done!") + + return - -def _calculate_average_degree_per_ts(graph): - total_nodes = graph.total_nodes() - total_ts = len(graph.data) +def _calculate_average_degree_per_ts(graph, total_nodes): + total_ts = len(graph) ave_degree = [] - for ts in range(total_ts): - num_edges = len(graph.data[ts]) + for t1 in range(total_ts): + num_edges = graph[t1].number_of_edges() ave_degree.append(num_edges*2/ total_nodes) return ave_degree def _calculate_node_per_ts(graph): active_nodes = [] - for ts in range(len(graph.data)): - active_nodes.append(graph.edgelist_node_count(graph.data[ts])) + for ts in range(len(graph)): + active_nodes.append(graph[ts].number_of_nodes()) return active_nodes def _calculate_edge_per_ts(graph): active_edges = [] - for ts in range(len(graph.data)): - active_edges.append(len(graph.data[ts])) + for ts in range(len(graph)): + active_edges.append(graph[ts].number_of_edges()) return active_edges def get_avg_e_per_ts(graph_edgelist: dict) -> float: r""" Calculate the average number of edges per timestamp - Parameters: - graph: Graph object created by tgx.Graph containing edgelist + graph_edgelist: Dictionary containing graph data """ sum_num_e_per_ts = 0 unique_ts = list(graph_edgelist.keys()) @@ -149,13 +175,12 @@ def get_avg_e_per_ts(graph_edgelist: dict) -> float: return avg_num_e_per_ts -def get_avg_degree(graph: object) -> float: +def get_avg_degree(graph_edgelist: dict) -> float: r""" Calculate average degree over the timestamps Parameters: - graph: Graph object created by tgx.Graph containing edgelist + graph_edgelist: Dictionary containing graph data """ - graph_edgelist = graph.data degree_avg_at_ts_list = [] unique_ts = list(graph_edgelist.keys()) for ts in unique_ts: @@ -175,30 +200,29 @@ def get_num_timestamps(graph_edgelist:dict) -> int: r""" Calculate the number of timestamps Parameters: - graph: Graph object created by tgx.Graph containing edgelist + graph_edgelist: Dictionary containing graph data """ print(f"INFO: Number of timestamps: {len(graph_edgelist)}") return len(graph_edgelist) -def get_num_unique_edges(graph: object) -> int: +def get_num_unique_edges(graph_edgelist: dict) -> int: r""" Calculate the number of unique edges Parameters: - graph: Graph object created by tgx.Graph containing edgelist + graph_edgelist: Dictionary containing graph data """ - graph_edgelist = graph.data unique_edges = {} for ts, e_list in graph_edgelist.items(): - for e in e_list: + for e, repeat in e_list.items(): if e not in unique_edges: unique_edges[e] = 1 print(f"INFO: Number of unique edges: {len(unique_edges)}") return len(unique_edges) -def _split_data_chronological(graph_edgelist: dict, test_ratio: int): +def _split_data_chronological(graph_edgelist, test_ratio): r""" - split the timestamped edge-list chronologically + Split the timestamped edge-list chronologically """ # split the temporal graph data chronologically unique_ts = np.sort(list(graph_edgelist.keys())) @@ -206,102 +230,75 @@ def _split_data_chronological(graph_edgelist: dict, test_ratio: int): # make train-validation & test splits train_val_e_set, test_e_set = {}, {} + # for ts, e_list in graph_edgelist.items(): + # for (u,v), repeat in e_list.items(): + + # if ts < test_split_time: + # if (u,v) not in train_val_e_set: + # train_val_e_set[(u,v)] = True + # else: + # if (u,v) not in test_e_set: + # test_e_set[(u,v)] = True + for ts, e_list in graph_edgelist.items(): - for (u,v) in e_list: + for (u,v), freq in e_list.items(): if ts < test_split_time: if (u,v) not in train_val_e_set: - train_val_e_set[(u,v)] = 1 + train_val_e_set[(u,v)] = freq else: if (u,v) not in test_e_set: - test_e_set[(u,v)] = 1 + test_e_set[(u,v)] = freq return train_val_e_set, test_e_set -def find(x, parent): +def _find(x, parent): if parent[x] == x: return x - parent[x] = find(parent[x], parent) + parent[x] = _find(parent[x], parent) return parent[x] -def merge(x, y, parent): - root_x = find(x, parent) - root_y = find(y, parent) +def _merge(x, y, parent): + root_x = _find(x, parent) + root_y = _find(y, parent) if root_x != root_y: parent[root_x] = root_y -# def size_connected_components(graph) -> list: -# """ -# Calculate the sizes of connected components per timestamp. - -# Returns: -# component_sizes: A list containing the sizes of connected components in each timestamp. -# """ - -# component_sizes = [] -# for t in range(len(graph)): -# parent = list(range(graph[t].number_of_nodes)) - -# for _, edge_data in graph[t].edgelist.items(): -# for (u, v), _ in edge_data.items(): -# merge(u, v, parent) - -# component_sizes_t = {} -# for u in graph[t].nodes(): -# root = find(u, parent) -# if root not in component_sizes_t: -# component_sizes_t[root] = 0 -# component_sizes_t[root] += 1 - -# component_sizes.append(component_sizes_t) - -# return component_sizes - - -# def num_connected_components_per_ts(graph: list, -# network_name: str = None, -# plot_path: str = None) -> None: -# """ - -# Plot the number of connected components per timestamp. - -# """ - -# num_components = [] -# for t in range(len(graph)): -# parent = list(range(graph[t].number_of_nodes)) +def size_connected_components(graph: list) -> List[Dict]: + r""" + Calculate the sizes of connected components per timestamp + Returns: + list: A list containing the sizes of connected components in each timestamp. + """ + component_sizes = [] + for t in range(len(graph)): + parent = list(range(graph[t].number_of_nodes)) -# for _, edge_data in graph[t].edgelist.items(): -# for (u, v), _ in edge_data.items(): -# merge(u, v, parent) + for _, edge_data in graph[t].edgelist.items(): + for (u, v), _ in edge_data.items(): + _merge(u, v, parent) -# num = 0 -# for u in graph[t].nodes(): -# if parent[u] == u: -# num += 1 -# num_components.append(num) + component_sizes_t = {} + for u in graph[t].nodes(): + root = _find(u, parent) + if root not in component_sizes_t: + component_sizes_t[root] = 0 + component_sizes_t[root] += 1 -# if network_name is not None: -# filename = f"{network_name}_num_connected_components_per_ts" -# else: -# filename = "_num_connected_components_per_ts" -# plot_for_snapshots(num_components, filename, "Number of connected components", plot_path = plot_path) -# print(num_components) -# print("Plotting Done!") + component_sizes.append(component_sizes_t) -# return + return component_sizes -def get_reoccurrence(graph:object, test_ratio: float=0.15) -> float: +def get_reoccurrence(graph_edgelist: dict, test_ratio: float=0.15) -> float: r""" Calculate the recurrence index Parameters: - graph: Graph object created by tgx.Graph containing edgelist + graph_edgelist: Dictionary containing graph data test_ratio: The ratio to split the data chronologically """ - graph_edgelist = graph.data train_val_e_set, test_e_set = _split_data_chronological(graph_edgelist, test_ratio) train_val_size = len(train_val_e_set) # intersect = 0 @@ -323,14 +320,13 @@ def get_reoccurrence(graph:object, test_ratio: float=0.15) -> float: print(f"INFO: Reoccurrence: {reoccurrence}") return reoccurrence -def get_surprise(graph, test_ratio: float = 0.15) -> float: +def get_surprise(graph_edgelist: dict, test_ratio: float = 0.15) -> float: r""" Calculate the surprise index Parameters: - graph: Graph object created by tgx.Graph containing edgelist + graph_edgelist: Dictionary containing graph data test_ratio: The ratio to split the data chronologically """ - graph_edgelist = graph.data train_val_e_set, test_e_set = _split_data_chronological(graph_edgelist, test_ratio) test_size = len(test_e_set) @@ -349,20 +345,20 @@ def get_surprise(graph, test_ratio: float = 0.15) -> float: print(f"INFO: Surprise: {surprise}") return surprise -def get_novelty(graph : object) -> float: +def get_novelty(graph_edgelist: dict) -> float: r""" Calculate the novelty index + $\operatorname{ker} f=\{g\in G:f(g)=e_{H}\}{\mbox{.}}$ Parameters: - graph: Graph object created by tgx.Graph containing edgelist + graph_edgelist: Dictionary containing graph data """ - graph_edgelist = graph.data unique_ts = np.sort(list(graph_edgelist.keys())) novelty_ts = [] for ts_idx, ts in enumerate(unique_ts): - e_set_this_ts = set(list(graph_edgelist[ts])) + e_set_this_ts = set(list(graph_edgelist[ts].keys())) e_set_seen = [] for idx in range(0, ts_idx): - e_set_seen.append(list(graph_edgelist[unique_ts[idx]])) + e_set_seen.append(list(graph_edgelist[unique_ts[idx]].keys())) e_set_seen = set(item for sublist in e_set_seen for item in sublist) novelty_ts.append(float(len(e_set_this_ts - e_set_seen) * 1.0 / len(e_set_this_ts))) @@ -371,18 +367,17 @@ def get_novelty(graph : object) -> float: return novelty -def get_avg_node_activity(graph: object) -> float: +def get_avg_node_activity(graph_edgelist: dict) -> float: r""" Calculate the average node activity, - the proportion of time steps a node is present + the proportion of time steps a node is present. Parameters: - graph: Graph object created by tgx.Graph containing edgelist + graph_edgelist: Dictionary containing graph data """ - graph_edgelist = graph.data num_unique_ts = len(graph_edgelist) node_ts = {} for ts, e_list in graph_edgelist.items(): - for e in e_list: + for e, repeat in e_list.items(): # source if e[0] not in node_ts: node_ts[e[0]] = {ts: True} @@ -406,34 +401,38 @@ def get_avg_node_activity(graph: object) -> float: return avg_node_activity -def get_avg_node_engagement(graph: object): +def get_avg_node_engagement(graph_edgelist: dict) -> List[int]: r""" - get the average node engagement over time. - node engagement represents the average number of distinct nodes that establish - at least one new connection during each time step. + Calculate the average node engagement per timestamp, + the average number of distinct nodes that establish + at least one new connection. + Parameters: + graph_edgelist: Dictionary containing graph data """ - graph_edgelist = graph.data engaging_nodes = [] previous_edges = set() for ts, e_list in graph_edgelist.items(): node_set = set() - new_edges = {(u, v) for (u, v) in e_list if frozenset({u, v}) not in previous_edges} + new_edges = {(u, v) for (u, v), _ in e_list.items() if frozenset({u, v}) not in previous_edges} for u, v in new_edges: if u not in node_set: node_set.add(u) if v not in node_set: node_set.add(v) - # engaging_nodes.append((ts, len(node_set))) engaging_nodes.append(len(node_set)) - previous_edges = {frozenset({u, v}) for (u, v) in e_list} # Update the set of previous edges for the next timestamp + previous_edges = {frozenset({u, v}) for (u, v), _ in e_list.items()} # Update the set of previous edges for the next timestamp return engaging_nodes -def degree_density(graph: object, network_name: str = None, k = 10, plot_path: str = None) -> None: +def degree_density(graph_edgelist: dict, k: int = 10, network_name: str = None, plot_path: str = None) -> None: r""" - plot the density map of node degrees over timestamps + Plot density map of node degrees per time window + Parameters: + graph_edgelist: Dictionary containing graph data + k: number of time windows + network_name: name of the graph to be used in the output file name + plot_path: path to save the output figure """ - graph_edgelist = graph.data degrees_by_k_list = [] temp = [] temp_idx = 0 From 1f9b87e169afccfe8ec33eed8cd76666d4dd0cb1 Mon Sep 17 00:00:00 2001 From: Elahe-ek Date: Tue, 28 Nov 2023 18:39:15 -0500 Subject: [PATCH 2/2] updated stats --- tgx/utils/newstat.py | 149 +++++++++++++++++++++++++++++++++++++++++++ tgx/utils/stat.py | 145 +---------------------------------------- 2 files changed, 152 insertions(+), 142 deletions(-) create mode 100644 tgx/utils/newstat.py diff --git a/tgx/utils/newstat.py b/tgx/utils/newstat.py new file mode 100644 index 0000000..b9ff7ed --- /dev/null +++ b/tgx/utils/newstat.py @@ -0,0 +1,149 @@ +from tgx.utils.plotting_utils import plot_for_snapshots, plot_nodes_edges_per_ts, plot_density_map +import networkx as nx +import numpy as np +from tgx.utils.graph_utils import train_test_split +from typing import List, Dict + +__all__ = ["connected_components_per_ts", + "size_connected_components", + "get_avg_node_engagement", + "degree_density"] + + +def degree_density(graph_edgelist: dict, k: int = 10, network_name: str = None, plot_path: str = None) -> None: + r""" + Plot density map of node degrees per time window + Parameters: + graph_edgelist: Dictionary containing graph data + k: number of time windows + network_name: name of the graph to be used in the output file name + plot_path: path to save the output figure + """ + degrees_by_k_list = [] + temp = [] + temp_idx = 0 + unique_ts = list(graph_edgelist.keys()) + for ts in unique_ts: + e_at_this_ts = graph_edgelist[ts] + G = nx.MultiGraph() + for e, repeat in e_at_this_ts.items(): + G.add_edge(e[0], e[1], weight=repeat) + nodes = G.nodes() + degrees = [G.degree[n] for n in nodes] + + if temp_idx None: + r""" + Plot number of connected components per timestamp + Parameters: + graph: a list containing graph snapshots + network_name: name of the graph to be used in the output file name + plot_path: path to save the output figure + """ + num_components = [] + for t in range(len(graph)): + parent = list(range(graph[t].number_of_nodes)) + + for _, edge_data in graph[t].edgelist.items(): + for (u, v), _ in edge_data.items(): + _merge(u, v, parent) + + num = 0 + for u in graph[t].nodes(): + if parent[u] == u: + num += 1 + num_components.append(num) + + if network_name is not None: + filename = f"{network_name}_connected_components_per_ts" + else: + filename = "_connected_components_per_ts" + plot_for_snapshots(num_components, filename, "Number of connected components", plot_path = plot_path) + print(num_components) + print("Plotting Done!") + + return + + +def size_connected_components(graph: list) -> List[Dict]: + r""" + Calculate the sizes of connected components per timestamp + Returns: + list: A list containing the sizes of connected components in each timestamp. + """ + component_sizes = [] + for t in range(len(graph)): + parent = list(range(graph[t].number_of_nodes)) + + for _, edge_data in graph[t].edgelist.items(): + for (u, v), _ in edge_data.items(): + _merge(u, v, parent) + + component_sizes_t = {} + for u in graph[t].nodes(): + root = _find(u, parent) + if root not in component_sizes_t: + component_sizes_t[root] = 0 + component_sizes_t[root] += 1 + + component_sizes.append(component_sizes_t) + + return component_sizes + + +def get_avg_node_engagement(graph_edgelist: dict) -> List[int]: + r""" + Calculate the average node engagement per timestamp, + the average number of distinct nodes that establish + at least one new connection. + Parameters: + graph_edgelist: Dictionary containing graph data + """ + engaging_nodes = [] + previous_edges = set() + for ts, e_list in graph_edgelist.items(): + node_set = set() + new_edges = {(u, v) for (u, v), _ in e_list.items() if frozenset({u, v}) not in previous_edges} + for u, v in new_edges: + if u not in node_set: + node_set.add(u) + if v not in node_set: + node_set.add(v) + engaging_nodes.append(len(node_set)) + previous_edges = {frozenset({u, v}) for (u, v), _ in e_list.items()} # Update the set of previous edges for the next timestamp + return engaging_nodes \ No newline at end of file diff --git a/tgx/utils/stat.py b/tgx/utils/stat.py index 616f678..d2a88ed 100644 --- a/tgx/utils/stat.py +++ b/tgx/utils/stat.py @@ -14,9 +14,8 @@ "get_num_unique_edges", "get_reoccurrence", "get_surprise", - "get_avg_node_activity", - "get_avg_node_engagement", - "degree_density"] + "get_novelty", + "get_avg_node_activity"] def average_degree_per_ts(graph: list, @@ -97,41 +96,6 @@ def nodes_and_edges_per_ts(graph: list, ts = list(range(0, len(graph))) return plot_nodes_edges_per_ts(edges, nodes, ts, network_name = network_name, plot_path = plot_path) - - -def connected_components_per_ts(graph: list, - network_name: str = None, - plot_path: str = None) -> None: - r""" - Plot number of connected components per timestamp - Parameters: - graph: a list containing graph snapshots - network_name: name of the graph to be used in the output file name - plot_path: path to save the output figure - """ - num_components = [] - for t in range(len(graph)): - parent = list(range(graph[t].number_of_nodes)) - - for _, edge_data in graph[t].edgelist.items(): - for (u, v), _ in edge_data.items(): - _merge(u, v, parent) - - num = 0 - for u in graph[t].nodes(): - if parent[u] == u: - num += 1 - num_components.append(num) - - if network_name is not None: - filename = f"{network_name}_connected_components_per_ts" - else: - filename = "_connected_components_per_ts" - plot_for_snapshots(num_components, filename, "Number of connected components", plot_path = plot_path) - print(num_components) - print("Plotting Done!") - - return def _calculate_average_degree_per_ts(graph, total_nodes): @@ -251,46 +215,6 @@ def _split_data_chronological(graph_edgelist, test_ratio): test_e_set[(u,v)] = freq return train_val_e_set, test_e_set -def _find(x, parent): - if parent[x] == x: - return x - parent[x] = _find(parent[x], parent) - return parent[x] - - -def _merge(x, y, parent): - root_x = _find(x, parent) - root_y = _find(y, parent) - - if root_x != root_y: - parent[root_x] = root_y - - -def size_connected_components(graph: list) -> List[Dict]: - r""" - Calculate the sizes of connected components per timestamp - Returns: - list: A list containing the sizes of connected components in each timestamp. - """ - component_sizes = [] - for t in range(len(graph)): - parent = list(range(graph[t].number_of_nodes)) - - for _, edge_data in graph[t].edgelist.items(): - for (u, v), _ in edge_data.items(): - _merge(u, v, parent) - - component_sizes_t = {} - for u in graph[t].nodes(): - root = _find(u, parent) - if root not in component_sizes_t: - component_sizes_t[root] = 0 - component_sizes_t[root] += 1 - - component_sizes.append(component_sizes_t) - - return component_sizes - def get_reoccurrence(graph_edgelist: dict, test_ratio: float=0.15) -> float: r""" @@ -398,67 +322,4 @@ def get_avg_node_activity(graph_edgelist: dict) -> float: avg_node_activity = float(np.sum(node_activity_ratio) * 1.0 / len(node_activity_ratio)) print(f"INFO: Node activity ratio: {avg_node_activity}") - return avg_node_activity - - -def get_avg_node_engagement(graph_edgelist: dict) -> List[int]: - r""" - Calculate the average node engagement per timestamp, - the average number of distinct nodes that establish - at least one new connection. - Parameters: - graph_edgelist: Dictionary containing graph data - """ - engaging_nodes = [] - previous_edges = set() - for ts, e_list in graph_edgelist.items(): - node_set = set() - new_edges = {(u, v) for (u, v), _ in e_list.items() if frozenset({u, v}) not in previous_edges} - for u, v in new_edges: - if u not in node_set: - node_set.add(u) - if v not in node_set: - node_set.add(v) - engaging_nodes.append(len(node_set)) - previous_edges = {frozenset({u, v}) for (u, v), _ in e_list.items()} # Update the set of previous edges for the next timestamp - return engaging_nodes - - -def degree_density(graph_edgelist: dict, k: int = 10, network_name: str = None, plot_path: str = None) -> None: - r""" - Plot density map of node degrees per time window - Parameters: - graph_edgelist: Dictionary containing graph data - k: number of time windows - network_name: name of the graph to be used in the output file name - plot_path: path to save the output figure - """ - degrees_by_k_list = [] - temp = [] - temp_idx = 0 - unique_ts = list(graph_edgelist.keys()) - for ts in unique_ts: - e_at_this_ts = graph_edgelist[ts] - G = nx.MultiGraph() - for e, repeat in e_at_this_ts.items(): - G.add_edge(e[0], e[1], weight=repeat) - nodes = G.nodes() - degrees = [G.degree[n] for n in nodes] - - if temp_idx