-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNetwork_analysis_tutorial.py
114 lines (80 loc) · 3.8 KB
/
Network_analysis_tutorial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 23 12:20:07 2023
@author: paweljakuszyk
"""
import csv
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community #This part of networkx, for community detection, needs to be imported separately.
with open('/Users/paweljakuszyk/Documents/Network_analysis_tutorial_data/quakers_nodelist.csv', 'r') as nodecsv: # Open the file
nodereader = csv.reader(nodecsv) # Read the csv
# Retrieve the data (using Python list comprhension and list slicing to remove the header row, see footnote 3)
nodes = [n for n in nodereader][1:]
node_names = [n[0] for n in nodes] # Get a list of only the node names
with open('/Users/paweljakuszyk/Documents/Network_analysis_tutorial_data/quakers_edgelist.csv', 'r') as edgecsv: # Open the file
edgereader = csv.reader(edgecsv) # Read the csv
edges = [tuple(e) for e in edgereader][1:] # Retrieve the data
G = nx.Graph()
G.add_nodes_from(node_names)
G.add_edges_from(edges)
print(G)
hist_sig_dict = {}
gender_dict = {}
birth_dict = {}
death_dict = {}
id_dict = {}
for node in nodes: # Loop through the list, one row at a time
hist_sig_dict[node[0]] = node[1]
gender_dict[node[0]] = node[2]
birth_dict[node[0]] = node[3]
death_dict[node[0]] = node[4]
id_dict[node[0]] = node[5]
nx.set_node_attributes(G, hist_sig_dict, 'historical_significance')
nx.set_node_attributes(G, gender_dict, 'gender')
nx.set_node_attributes(G, birth_dict, 'birth_year')
nx.set_node_attributes(G, death_dict, 'death_year')
nx.set_node_attributes(G, id_dict, 'sdfb_id')
for n in G.nodes(): # Loop through every node, in our data "n" will be the name of the person
print(n, G.nodes[n]['birth_year']) # Access every node by its name, and then by the attribute "birth_year"
density = nx.density(G)
print("Network density:", density)
fell_whitehead_path = nx.shortest_path(G, source="Margaret Fell", target="George Whitehead")
print("Shortest path between Fell and Whitehead:", fell_whitehead_path)
print("Length of that path:", len(fell_whitehead_path)-1)
# If your Graph has more than one component, this will return False:
print(nx.is_connected(G))
# Next, use nx.connected_components to get the list of components,
# then use the max() command to find the largest one:
components = nx.connected_components(G)
largest_component = max(components, key=len)
# Create a "subgraph" of just the largest component
# Then calculate the diameter of the subgraph, just like you did with density.
#
subgraph = G.subgraph(largest_component)
diameter = nx.diameter(subgraph)
print("Network diameter of largest component:", diameter)
triadic_closure = nx.transitivity(G)
print("Triadic closure:", triadic_closure)
degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree')
sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)
print("Top 20 nodes by degree:")
for d in sorted_degree[:20]:
print(d)
betweenness_dict = nx.betweenness_centrality(G) # Run betweenness centrality
eigenvector_dict = nx.eigenvector_centrality(G) # Run eigenvector centrality
# Assign each to an attribute in your network
nx.set_node_attributes(G, betweenness_dict, 'betweenness')
nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')
sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True)
print("Top 20 nodes by betweenness centrality:")
for b in sorted_betweenness[:20]:
print(b)
#First get the top 20 nodes by betweenness as a list
top_betweenness = sorted_betweenness[:20]
#Then find and print their degree
for tb in top_betweenness: # Loop through top_betweenness
degree = degree_dict[tb[0]] # Use degree_dict to access a node's degree, see footnote 2
print("Name:", tb[0], "| Betweenness Centrality:", tb[1], "| Degree:", degree)