From 96c562d47e56deeaffb519a90247bde13f1e075e Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Thu, 1 Aug 2019 16:52:20 -0400 Subject: [PATCH] Use N or N-1 instead of empirical kmax in degree histogram Tweaks `DegreeDivergence` to use N (if there are self-loops) or N-1 (if there are not) instead of the largest observed degree when constructing the degree histogram. This works because a `Counter` behaves like a `defaultdict(int)`, so the list comprehension ``` hist = np.array([counter[v] for v in range(max_deg)]) ``` pads out the zeros automatically. It should rarely affect results, but has the advantages of (i) being slightly more accurate for the pedants among us and (ii) being faster for some reason. And while it doesn't affect the JSD, if we ever do implement #174, maybe it ends up mattering somewhere else. --- netrd/distance/degree_divergence.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/netrd/distance/degree_divergence.py b/netrd/distance/degree_divergence.py index 45b9d4df..0a3588a0 100644 --- a/netrd/distance/degree_divergence.py +++ b/netrd/distance/degree_divergence.py @@ -48,7 +48,10 @@ def degree_vector_histogram(graph): """ vec = np.array(list(dict(graph.degree()).values())) - max_deg = max(vec) + if next(graph.selfloop_edges(), False): + max_deg = len(graph) + else: + max_deg = len(graph) - 1 counter = Counter(vec) hist = np.array([counter[v] for v in range(max_deg)]) return vec, hist