diff --git a/LICENSE.md b/LICENSE.md index ac2d4fcc2..aed60deb3 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -31,3 +31,24 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +--------------------------------- + +License + +Tensor methods for nonuniform hypergraphs + +* Tensor methods functionality for the CompleX Group Interactions library + +Copyright 2023, 2024 Battelle Memorial Institute + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/docs/source/api/algorithms/xgi.algorithms.centrality.rst b/docs/source/api/algorithms/xgi.algorithms.centrality.rst index d6463cec3..abc827059 100644 --- a/docs/source/api/algorithms/xgi.algorithms.centrality.rst +++ b/docs/source/api/algorithms/xgi.algorithms.centrality.rst @@ -9,6 +9,8 @@ xgi.algorithms.centrality .. autofunction:: clique_eigenvector_centrality .. autofunction:: h_eigenvector_centrality + .. autofunction:: z_eigenvector_centrality .. autofunction:: node_edge_centrality .. autofunction:: line_vector_centrality .. autofunction:: katz_centrality + .. autofunction:: uniform_h_eigenvector_centrality \ No newline at end of file diff --git a/docs/source/api/stats/xgi.stats.nodestats.rst b/docs/source/api/stats/xgi.stats.nodestats.rst index 7d9195039..24f9b9a05 100644 --- a/docs/source/api/stats/xgi.stats.nodestats.rst +++ b/docs/source/api/stats/xgi.stats.nodestats.rst @@ -9,12 +9,14 @@ .. autofunction:: attrs .. autofunction:: average_neighbor_degree - .. autofunction:: clique_eigenvector_centrality - .. autofunction:: clustering_coefficient .. autofunction:: degree + .. autofunction:: clique_eigenvector_centrality .. autofunction:: h_eigenvector_centrality - .. autofunction:: local_clustering_coefficient + .. autofunction:: z_eigenvector_centrality + .. autofunction:: katz_centrality .. autofunction:: node_edge_centrality + .. autofunction:: clustering_coefficient + .. autofunction:: local_clustering_coefficient .. autofunction:: two_node_clustering_coefficient .. autofunction:: local_simplicial_fraction .. autofunction:: local_edit_simpliciality diff --git a/docs/source/api/tutorials/case_studies.rst b/docs/source/api/tutorials/case_studies.rst index 7a8840576..f57400572 100644 --- a/docs/source/api/tutorials/case_studies.rst +++ b/docs/source/api/tutorials/case_studies.rst @@ -8,4 +8,5 @@ Case studies :maxdepth: 1 case_study_1 - case_study_2 \ No newline at end of file + case_study_2 + case_study_3 \ No newline at end of file diff --git a/docs/source/api/tutorials/case_study_3.nblink b/docs/source/api/tutorials/case_study_3.nblink new file mode 100644 index 000000000..92312b29c --- /dev/null +++ b/docs/source/api/tutorials/case_study_3.nblink @@ -0,0 +1,3 @@ +{ + "path": "../../../../tutorials/case_studies/comparing_centralities.ipynb" +} \ No newline at end of file diff --git a/docs/source/assets/images/Filtering_2024_Fig1.png b/docs/source/assets/images/Filtering_2024_Fig1.png deleted file mode 100644 index e5fc00714..000000000 Binary files a/docs/source/assets/images/Filtering_2024_Fig1.png and /dev/null differ diff --git a/docs/source/assets/images/Simpliciality_2023_Fig3.png b/docs/source/assets/images/Simpliciality_2023_Fig3.png deleted file mode 100644 index 9a0a620fd..000000000 Binary files a/docs/source/assets/images/Simpliciality_2023_Fig3.png and /dev/null differ diff --git a/docs/source/assets/images/XGI_2023_Fig2.png b/docs/source/assets/images/XGI_2023_Fig2.png deleted file mode 100644 index 3831c3c78..000000000 Binary files a/docs/source/assets/images/XGI_2023_Fig2.png and /dev/null differ diff --git a/docs/source/user_guides.rst b/docs/source/user_guides.rst index 3119a5050..94826d80e 100644 --- a/docs/source/user_guides.rst +++ b/docs/source/user_guides.rst @@ -59,39 +59,38 @@ User Guides To the in-depth tutorials .. grid:: - + .. grid-item-card:: :text-align: center - Cookbook + Case studies ^^^ - Recipes to solve specific tasks in a few lines - + To see how others have used XGI in their work +++ - .. button-ref:: api/tutorials/recipes + .. button-ref:: api/tutorials/case_studies :expand: :color: secondary :click-parent: - To the cookbook - + To the case studies + .. grid-item-card:: :text-align: center - Case studies + Cookbook ^^^ - To see how others have used XGI in their work + Recipes to solve specific tasks in a few lines + +++ - .. button-ref:: api/tutorials/case_studies + .. button-ref:: api/tutorials/recipes :expand: :color: secondary :click-parent: - To the case studies - + To the cookbook For all specifications and options of a particular function, or to explore all existing functions, see the `API Reference `_. \ No newline at end of file diff --git a/docs/source/using-xgi.rst b/docs/source/using-xgi.rst index dbd32d0d3..610b2f92a 100644 --- a/docs/source/using-xgi.rst +++ b/docs/source/using-xgi.rst @@ -12,6 +12,11 @@ Published work 2024 ---- +Sinan G. Aksoy, Ilya Amburg, and Stephen J. Young, "Scalable Tensor Methods for Nonuniform Hypergraphs", *SIAM Journal on Mathematics of Data Science*, Vol. 6, Iss. 2, 481-503 (2024). + +:bdg-link-primary-line:`Paper ` +:bdg-link-primary-line:`Code ` + Gonzalo Contreras-Aso, Regino Criado, and Miguel Romance, "Beyond directed hypergraphs: heterogeneous hypergraphs and spectral centralities", *Journal of Complex Networks*, Volume 12, Issue 4, cnae037 (2024). :bdg-link-primary-line:`Paper ` diff --git a/tests/algorithms/test_centrality.py b/tests/algorithms/test_centrality.py index 80fecabe3..c800cef43 100644 --- a/tests/algorithms/test_centrality.py +++ b/tests/algorithms/test_centrality.py @@ -28,47 +28,54 @@ def test_clique_eigenvector_centrality(): H = xgi.sunflower(3, 1, 3) c = H.nodes.clique_eigenvector_centrality.asnumpy() assert norm(c[1:] - c[1]) < 1e-4 - assert abs(c[0] / c[1] - ratio(3, 3, kind="CEC")) < 1e-4 + assert abs(c[0] / c[1] - _ratio(3, 3, kind="CEC")) < 1e-4 H = xgi.sunflower(5, 1, 7) c = H.nodes.clique_eigenvector_centrality.asnumpy() assert norm(c[1:] - c[1]) < 1e-4 - assert abs(c[0] / c[1] - ratio(5, 7, kind="CEC")) < 1e-4 + assert abs(c[0] / c[1] - _ratio(5, 7, kind="CEC")) < 1e-4 @pytest.mark.slow -def test_h_eigenvector_centrality(): +def test_uniform_h_eigenvector_centrality(): # test empty hypergraph H = xgi.Hypergraph() - c = xgi.h_eigenvector_centrality(H) + c = xgi.uniform_h_eigenvector_centrality(H) assert c == dict() # Test no edges H.add_nodes_from([0, 1, 2]) - hec = xgi.h_eigenvector_centrality(H) + hec = xgi.uniform_h_eigenvector_centrality(H) for i in hec: assert np.isnan(hec[i]) # test disconnected H.add_edge([0, 1]) - hec = xgi.h_eigenvector_centrality(H) + hec = xgi.uniform_h_eigenvector_centrality(H) assert set(hec) == {0, 1, 2} for i in hec: assert np.isnan(hec[i]) H = xgi.sunflower(3, 1, 5) - c = H.nodes.h_eigenvector_centrality(max_iter=1000).asnumpy() + c = xgi.uniform_h_eigenvector_centrality(H, max_iter=1000) + c = np.array(list(c.values())) assert norm(c[1:] - c[1]) < 1e-4 - assert abs(c[0] / c[1] - ratio(3, 5, kind="HEC")) < 1e-4 + assert abs(c[0] / c[1] - _ratio(3, 5, kind="HEC")) < 1e-4 H = xgi.sunflower(5, 1, 7) - c = H.nodes.h_eigenvector_centrality(max_iter=1000).asnumpy() + c = xgi.uniform_h_eigenvector_centrality(H, max_iter=1000) + c = np.array(list(c.values())) assert norm(c[1:] - c[1]) < 1e-4 - assert abs(c[0] / c[1] - ratio(5, 7, kind="HEC")) < 1e-4 + assert abs(c[0] / c[1] - _ratio(5, 7, kind="HEC")) < 1e-4 with pytest.raises(XGIError): H = xgi.Hypergraph([[1, 2], [2, 3, 4]]) - H.nodes.h_eigenvector_centrality.asnumpy() + xgi.uniform_h_eigenvector_centrality(H) + + # non-convergence + with pytest.raises(XGIError): + H = xgi.Hypergraph([[1, 2], [2, 3, 4]]) + xgi.uniform_h_eigenvector_centrality(H, max_iter=2) def test_node_edge_centrality(): @@ -105,6 +112,11 @@ def test_node_edge_centrality(): c = H.edges.node_edge_centrality.asnumpy() assert abs(c[0] - c[1]) < 1e-6 + H = xgi.load_xgi_data("email-enron").cleanup() + c = xgi.node_edge_centrality(H) + assert len(c[0]) == H.num_nodes + assert len(c[1]) == H.num_edges + def test_line_vector_centrality(): H = xgi.Hypergraph() @@ -128,36 +140,6 @@ def test_line_vector_centrality(): xgi.line_vector_centrality(H) -def ratio(r, m, kind="CEC"): - """Generate the ratio between largest and second largest centralities - for the sunflower hypergraph with one core node. - - Parameters - ---------- - r : int - Number of petals - m : int - Size of edges - kind : str, default: "CEC" - "CEC" or "HEC" - - Returns - ------- - float - Ratio - - References - ---------- - Three Hypergraph Eigenvector Centralities, - Austin R. Benson, - https://doi.org/10.1137/18M1203031 - """ - if kind == "CEC": - return 2 * r * (m - 1) / (np.sqrt(m**2 + 4 * (m - 1) * (r - 1)) + m - 2) - elif kind == "HEC": - return r ** (1.0 / m) - - def test_katz_centrality(edgelist1, edgelist8): # test hypergraph with no edge H = xgi.Hypergraph() @@ -195,3 +177,130 @@ def test_katz_centrality(edgelist1, edgelist8): } for n in c: assert np.allclose(c[n], expected_c[n]) + + +@pytest.mark.slow +def test_h_eigenvector_centrality(): + # test empty hypergraph + H = xgi.Hypergraph() + c = xgi.h_eigenvector_centrality(H) + assert c == dict() + + # Test no edges + H.add_nodes_from([0, 1, 2]) + hec = xgi.h_eigenvector_centrality(H) + for i in hec: + assert np.isnan(hec[i]) + + # test disconnected + H.add_edge([0, 1]) + hec = xgi.h_eigenvector_centrality(H) + assert set(hec) == {0, 1, 2} + for i in hec: + assert np.isnan(hec[i]) + + H = xgi.sunflower(3, 1, 5) + c = xgi.h_eigenvector_centrality(H, max_iter=1000) + assert ( + max([abs(c[0] / c[i + 1] - _ratio(3, 5, kind="HEC")) for i in range(12)]) < 1e-4 + ) + + H = xgi.sunflower(5, 1, 7) + print(H.num_nodes) + c = xgi.h_eigenvector_centrality(H, max_iter=1000) + assert ( + max([abs(c[0] / c[i + 1] - _ratio(5, 7, kind="HEC")) for i in range(29)]) < 1e-4 + ) + + H = xgi.Hypergraph([[1, 2], [2, 3, 4]]) + c = xgi.h_eigenvector_centrality(H) + true_c = { + 1: 0.24458437592396465, + 2: 0.3014043407819482, + 3: 0.22700561916516002, + 4: 0.22700566412892714, + } + for i in c: + assert np.allclose(c[i], true_c[i]) + + H = xgi.load_xgi_data("email-enron") + H.cleanup(relabel=False) + c = xgi.h_eigenvector_centrality(H) + assert sorted(c) == sorted(H.nodes) + + +@pytest.mark.slow +def test_z_eigenvector_centrality(): + # test empty hypergraph + H = xgi.Hypergraph() + c = xgi.z_eigenvector_centrality(H) + assert c == dict() + + # Test no edges + H.add_nodes_from([0, 1, 2]) + hec = xgi.z_eigenvector_centrality(H) + for i in hec: + assert np.isnan(hec[i]) + + # test disconnected + H.add_edge([0, 1]) + hec = xgi.z_eigenvector_centrality(H) + assert set(hec) == {0, 1, 2} + for i in hec: + assert np.isnan(hec[i]) + + H = xgi.sunflower(3, 1, 5) + c = H.nodes.z_eigenvector_centrality(max_iter=1000).asdict() + assert ( + max([abs(c[0] / c[i + 1] - _ratio(3, 5, kind="ZEC")) for i in range(12)]) < 1e-4 + ) + + H = xgi.sunflower(5, 1, 7) + print(H.num_nodes) + c = xgi.z_eigenvector_centrality(H, max_iter=1000) + assert ( + max([abs(c[0] / c[i + 1] - _ratio(5, 7, kind="ZEC")) for i in range(29)]) < 1e-4 + ) + + H = xgi.Hypergraph([[1, 2], [2, 3, 4]]) + c = xgi.z_eigenvector_centrality(H, max_iter=10000) + true_c = { + 1: 0.45497398635982933, + 2: 0.45900452108663403, + 3: 0.04301074627676834, + 4: 0.04301074627676829, + } + for i in c: + assert np.allclose(c[i], true_c[i]) + + +def _ratio(r, m, kind="CEC"): + """Generate the _ratio between largest and second largest centralities + for the sunflower hypergraph with one core node. + + Parameters + ---------- + r : int + Number of petals + m : int + Size of edges + kind : str, default: "CEC" + "CEC" or "HEC" + + Returns + ------- + float + Ratio + + References + ---------- + Three Hypergraph Eigenvector Centralities, + Austin R. Benson, + https://doi.org/10.1137/18M1203031 + """ + if kind == "CEC": + return 2 * r * (m - 1) / (np.sqrt(m**2 + 4 * (m - 1) * (r - 1)) + m - 2) + elif kind == "HEC": + return r ** (1.0 / m) + elif kind == "ZEC": + return r**0.5 diff --git a/tutorials/case_studies/comparing_centralities.ipynb b/tutorials/case_studies/comparing_centralities.ipynb new file mode 100644 index 000000000..acc22d758 --- /dev/null +++ b/tutorials/case_studies/comparing_centralities.ipynb @@ -0,0 +1,134 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Comparing centrality measures\n", + "\n", + "XGi has several different centrality measures. How do they stack up against one another? We were curious too! Below is a pairplot comparing every centrality to each other for a selected hypergraph." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import xgi\n", + "import seaborn as sns\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = \"email-enron\"\n", + "\n", + "\n", + "H = xgi.load_xgi_data(dataset)\n", + "H.cleanup()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we compute different measures of centrality on the hypergraph:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "c1 = H.nodes.clique_eigenvector_centrality.asnumpy()\n", + "c2 = H.nodes.h_eigenvector_centrality(max_iter=1000).asnumpy()\n", + "c3 = H.nodes.z_eigenvector_centrality(max_iter=1000).asnumpy()\n", + "c4 = H.nodes.katz_centrality.asnumpy()\n", + "c5 = H.nodes.node_edge_centrality(max_iter=1000).asnumpy()\n", + "\n", + "df = pd.DataFrame()\n", + "# df[\"node-edge\"] = c1\n", + "df[\"CEC\"] = c1\n", + "df[\"HEC\"] = c2\n", + "df[\"ZEC\"] = c3\n", + "df[\"Katz\"] = c4\n", + "df[\"Node-Edge\"] = c5" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.pairplot(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "xgi", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/xgi/algorithms/centrality.py b/xgi/algorithms/centrality.py index 37c7a1dc4..57272d6d4 100644 --- a/xgi/algorithms/centrality.py +++ b/xgi/algorithms/centrality.py @@ -10,15 +10,18 @@ from ..convert import to_line_graph from ..exception import XGIError from ..linalg import clique_motif_matrix, incidence_matrix -from ..utils import convert_labels_to_integers +from ..utils import convert_labels_to_integers, pairwise_incidence, ttsv1, ttsv2 +from .connected import is_connected from .properties import is_uniform __all__ = [ "clique_eigenvector_centrality", "h_eigenvector_centrality", + "z_eigenvector_centrality", "node_edge_centrality", "line_vector_centrality", "katz_centrality", + "uniform_h_eigenvector_centrality", ] @@ -41,6 +44,7 @@ def clique_eigenvector_centrality(H, tol=1e-6): See Also -------- h_eigenvector_centrality + z_eigenvector_centrality References ---------- @@ -48,8 +52,6 @@ def clique_eigenvector_centrality(H, tol=1e-6): Austin R. Benson, https://doi.org/10.1137/18M1203031 """ - from ..algorithms import is_connected - # if there aren't any nodes, return an empty dict if H.num_nodes == 0: return dict() @@ -65,101 +67,6 @@ def clique_eigenvector_centrality(H, tol=1e-6): return {node_dict[n]: v[n].item() for n in node_dict} -def h_eigenvector_centrality(H, max_iter=100, tol=1e-6): - """Compute the H-eigenvector centrality of a uniform hypergraph. - - Parameters - ---------- - H : Hypergraph - The hypergraph of interest. - max_iter : int, optional - The maximum number of iterations before the algorithm terminates. - By default, 100. - tol : float > 0, optional - The desired L2 error in the centrality vector. By default, 1e-6. - - Returns - ------- - dict - Centrality, where keys are node IDs and values are centralities. The - centralities are 1-normalized. - - Raises - ------ - XGIError - If the hypergraph is not uniform. - - See Also - -------- - clique_eigenvector_centrality - - References - ---------- - Three Hypergraph Eigenvector Centralities, - Austin R. Benson, - https://doi.org/10.1137/18M1203031 - """ - from ..algorithms import is_connected - - # if there aren't any nodes, return an empty dict - if H.num_nodes == 0: - return dict() - # if the hypergraph is not connected, - # this metric doesn't make sense and should return nan. - if not is_connected(H): - return {n: np.nan for n in H.nodes} - - m = is_uniform(H) - if not m: - raise XGIError("This method is not defined for non-uniform hypergraphs.") - - new_H = convert_labels_to_integers(H, "old-label") - - f = lambda v, m: np.power(v, 1.0 / m) # noqa: E731 - g = lambda v, x: np.prod(v[list(x)]) # noqa: E731 - - x = np.random.uniform(size=(new_H.num_nodes)) - x = x / norm(x, 1) - - for iter in range(max_iter): - new_x = apply(new_H, x, g) - new_x = f(new_x, m) - # multiply by the sign to try and enforce positivity - new_x = np.sign(new_x[0]) * new_x / norm(new_x, 1) - if norm(x - new_x) <= tol: - break - x = new_x.copy() - else: - warn("Iteration did not converge!") - return {new_H.nodes[n]["old-label"]: c for n, c in zip(new_H.nodes, new_x)} - - -def apply(H, x, g=lambda v, e: np.sum(v[list(e)])): - """Apply a vector to the hypergraph given a function. - - Parameters - ---------- - H : Hypergraph - Hypergraph of interest. - x : 1D numpy array - 1D vector - g : lambda function, optional - function to apply. By default, sum. - - Returns - ------- - 1D numpy array - vector post application - """ - new_x = np.zeros(H.num_nodes) - for edge in H.edges.members(): - edge = list(edge) - # ordered permutations - for shift in range(len(edge)): - new_x[edge[shift]] += g(x, edge[shift + 1 :] + edge[:shift]) - return new_x - - def node_edge_centrality( H, f=lambda x: np.power(x, 2), @@ -169,7 +76,7 @@ def node_edge_centrality( max_iter=100, tol=1e-6, ): - """Computes the node and edge centralities + r"""Computes the node and edge centralities Parameters ---------- @@ -214,15 +121,10 @@ def node_edge_centrality( Francesco Tudisco & Desmond J. Higham, https://doi.org/10.1038/s42005-021-00704-2 """ - from ..algorithms import is_connected - - # if there aren't any nodes or edges, return an empty dict + # if the hypergraph is not connected or is empty, + # this metric doesn't make sense and should return nan. if H.num_nodes == 0 or H.num_edges == 0 or not is_connected(H): return {n: np.nan for n in H.nodes}, {e: np.nan for e in H.edges} - # if the hypergraph is not connected, - # this metric doesn't make sense and should return nan. - # if not is_connected(H): - # return {n: np.nan for n in H.nodes}, {e: np.nan for e in H.edges} n = H.num_nodes m = H.num_edges @@ -233,9 +135,9 @@ def node_edge_centrality( check = np.inf - for iter in range(max_iter): - u = np.multiply(x, g(I @ f(y))) - v = np.multiply(y, psi(I.T @ phi(x))) + for it in range(max_iter): + u = (x * g(I @ f(y))) ** 0.5 + v = (y * psi(I.T @ phi(x))) ** 0.5 # multiply by the sign to try and enforce positivity new_x = np.sign(u[0]) * u / norm(u, 1) new_y = np.sign(v[0]) * v / norm(v, 1) @@ -273,8 +175,6 @@ def line_vector_centrality(H): https://doi.org/10.1016/j.chaos.2022.112397 """ - from ..algorithms import is_connected - # If the hypergraph is empty, then return an empty dictionary if H.num_nodes == 0: return dict() @@ -392,3 +292,272 @@ def katz_centrality(H, cutoff=100): c *= 1 / norm(c, 1) nodedict = dict(zip(range(n), H.nodes)) return {nodedict[idx]: c[idx] for idx in nodedict} + + +def h_eigenvector_centrality(H, max_iter=100, tol=1e-6): + """Compute the H-eigenvector centrality of a hypergraph. + + The H-eigenvector terminology comes from Qi (2005) which + defines a "tensor H-eigenpair". + + Parameters + ---------- + H : Hypergraph + The hypergraph of interest. + max_iter : int, optional + The maximum number of iterations before the algorithm terminates. + By default, 100. + tol : float > 0, optional + The desired convergence tolerance. By default, 1e-6. + + Returns + ------- + dict + Centrality, where keys are node IDs and values are centralities. The + centralities are 1-normalized. + + See Also + -------- + clique_eigenvector_centrality + z_eigenvector_centrality + uniform_h_eigenvector_centrality + + References + ---------- + Scalable Tensor Methods for Nonuniform Hypergraphs, + Sinan Aksoy, Ilya Amburg, Stephen Young, + https://doi.org/10.1137/23M1584472 + + Three Hypergraph Eigenvector Centralities, + Austin R. Benson, + https://doi.org/10.1137/18M1203031 + + Computing tensor Z-eigenvectors with dynamical systems + Austin R. Benson and David F. Gleich + https://doi.org/10.1137/18M1229584 + + Liqun Qi + "Eigenvalues of a real supersymmetric tensor" + Journal of Symbolic Computation, **40**, *6* (2005). + https://doi.org/10.1016/j.jsc.2005.05.007. + """ + # if there aren't any nodes, return an empty dict + if H.num_nodes == 0: + return dict() + # if the hypergraph is not connected, + # this metric doesn't make sense and should return nan. + if not is_connected(H): + return {n: np.nan for n in H.nodes} + + new_H = convert_labels_to_integers(H, "old-label") + edge_dict = new_H.edges.members(dtype=dict) + node_dict = new_H.nodes.memberships() + r = new_H.edges.size.max() + + x = np.random.uniform(size=(new_H.num_nodes)) + x = x / norm(x, 1) + y = np.abs(np.array(ttsv1(node_dict, edge_dict, r, x))) + + converged = False + it = 0 + while it < max_iter and not converged: + y_scaled = [_y ** (1 / (r - 1)) for _y in y] + x = y_scaled / norm(y_scaled, 1) + y = np.abs(np.array(ttsv1(node_dict, edge_dict, r, x))) + s = [a / (b ** (r - 1)) for a, b in zip(y, x)] + if (np.max(s) - np.min(s)) / np.min(s) < tol: + break + it += 1 + else: + warn("Iteration did not converge!") + return { + new_H.nodes[n]["old-label"]: c.item() + for n, c in zip(new_H.nodes, x / norm(x, 1)) + } + + +def z_eigenvector_centrality(H, max_iter=100, tol=1e-6): + """Compute the Z-eigenvector centrality of a hypergraph. + + The Z-eigenvector terminology comes from Qi (2005) which + defines a "tensor Z-eigenpair". + + Parameters + ---------- + H : Hypergraph + The hypergraph of interest. + max_iter : int, optional + The maximum number of iterations before the algorithm terminates. + By default, 100. + tol : float > 0, optional + The desired convergence tolerance. By default, 1e-6. + + Returns + ------- + dict + Centrality, where keys are node IDs and values are centralities. The + centralities are 1-normalized. + + Raises + ------ + XGIError + If the hypergraph is not uniform. + + See Also + -------- + clique_eigenvector_centrality + h_eigenvector_centrality + + References + ---------- + Scalable Tensor Methods for Nonuniform Hypergraphs, + Sinan Aksoy, Ilya Amburg, Stephen Young, + https://doi.org/10.1137/23M1584472 + + Three Hypergraph Eigenvector Centralities, + Austin R. Benson, + https://doi.org/10.1137/18M1203031 + + Liqun Qi + "Eigenvalues of a real supersymmetric tensor" + Journal of Symbolic Computation, **40**, *6* (2005). + https://doi.org/10.1016/j.jsc.2005.05.007. + """ + # if there aren't any nodes, return an empty dict + n = H.num_nodes + if n == 0: + return dict() + + # if the hypergraph is not connected, + # this metric doesn't make sense and should return nan. + if not is_connected(H): + return {n: np.nan for n in H.nodes} + new_H = convert_labels_to_integers(H, "old-label") + max_size = new_H.edges.size.max() + edge_dict = new_H.edges.members(dtype=dict) + pairs_dict = pairwise_incidence(edge_dict, max_size) + + r = H.edges.size.max() + + def LR_evec(A): + """Compute the largest real eigenvalue of the matrix A""" + _, v = eigsh(A, k=1, which="LM", tol=1e-5, maxiter=200) + evec = np.array([_v for _v in v[:, 0]]) + if evec[0] < 0: + evec = -evec + return evec / norm(evec, 1) + + def f(u): + return LR_evec(ttsv2(pairs_dict, edge_dict, r, u, n)) - u + + x = np.ones(n) / n + + h = 0.5 + converged = False + it = 0 + while it < max_iter and not converged: + x_new = x + h * f(x) + s = np.array([a / b for a, b in zip(x_new, x)]) + if (np.max(s) - np.min(s)) / np.min(s) < tol: + break + x = x_new + it += 1 + else: + warn("Iteration did not converge!") + return { + new_H.nodes[n]["old-label"]: c.item() + for n, c in zip(new_H.nodes, x / norm(x, 1)) + } + + +def uniform_h_eigenvector_centrality(H, max_iter=100, tol=1e-6): + """Compute the H-eigenvector centrality of a uniform hypergraph. + + Parameters + ---------- + H : Hypergraph + The hypergraph of interest. + max_iter : int, optional + The maximum number of iterations before the algorithm terminates. + By default, 100. + tol : float > 0, optional + The desired L2 error in the centrality vector. By default, 1e-6. + + Returns + ------- + dict + Centrality, where keys are node IDs and values are centralities. The + centralities are 1-normalized. + + Raises + ------ + XGIError + If the hypergraph is not uniform. + + See Also + -------- + clique_eigenvector_centrality + + References + ---------- + Three Hypergraph Eigenvector Centralities, + Austin R. Benson, + https://doi.org/10.1137/18M1203031 + """ + # if there aren't any nodes, return an empty dict + if H.num_nodes == 0: + return dict() + # if the hypergraph is not connected, + # this metric doesn't make sense and should return nan. + if not is_connected(H): + return {n: np.nan for n in H.nodes} + + m = is_uniform(H) + if not m: + raise XGIError("This method is not defined for non-uniform hypergraphs.") + + new_H = convert_labels_to_integers(H, "old-label") + + f = lambda v, m: np.power(v, 1.0 / m) # noqa: E731 + g = lambda v, x: np.prod(v[list(x)]) # noqa: E731 + + x = np.random.uniform(size=(new_H.num_nodes)) + x = x / norm(x, 1) + + for iter in range(max_iter): + x_new = apply(new_H, x, g) + x_new = f(x_new, m) + # multiply by the sign to try and enforce positivity + x_new = np.sign(x_new[0]) * x_new / norm(x_new, 1) + if norm(x - x_new) <= tol: + break + x = x_new.copy() + else: + warn("Iteration did not converge!") + return {new_H.nodes[n]["old-label"]: c for n, c in zip(new_H.nodes, x_new)} + + +def apply(H, x, g=lambda v, e: np.sum(v[list(e)])): + """Apply a vector to the hypergraph given a function. + + Parameters + ---------- + H : Hypergraph + Hypergraph of interest. + x : 1D numpy array + 1D vector + g : lambda function, optional + function to apply. By default, sum. + + Returns + ------- + 1D numpy array + vector post application + """ + new_x = np.zeros(H.num_nodes) + for edge in H.edges.members(): + edge = list(edge) + # ordered permutations + for shift in range(len(edge)): + new_x[edge[shift]] += g(x, edge[shift + 1 :] + edge[:shift]) + return new_x diff --git a/xgi/stats/nodestats.py b/xgi/stats/nodestats.py index b09a871e9..7f75b19bf 100644 --- a/xgi/stats/nodestats.py +++ b/xgi/stats/nodestats.py @@ -31,6 +31,7 @@ "two_node_clustering_coefficient", "clique_eigenvector_centrality", "h_eigenvector_centrality", + "z_eigenvector_centrality", "node_edge_centrality", "katz_centrality", ] @@ -347,6 +348,9 @@ def clique_eigenvector_centrality(net, bunch, tol=1e-6): def h_eigenvector_centrality(net, bunch, max_iter=10, tol=1e-6): """Compute the H-eigenvector centrality of a hypergraph. + The H-eigenvector terminology comes from Qi (2005) which + defines a "tensor H-eigenpair". + Parameters ---------- net : xgi.Hypergraph @@ -368,11 +372,57 @@ def h_eigenvector_centrality(net, bunch, max_iter=10, tol=1e-6): Three Hypergraph Eigenvector Centralities, Austin R. Benson, https://doi.org/10.1137/18M1203031 + + Scalable Tensor Methods for Nonuniform Hypergraphs, + Sinan Aksoy, Ilya Amburg, Stephen Young, + https://doi.org/10.1137/23M1584472 + + Liqun Qi + "Eigenvalues of a real supersymmetric tensor" + Journal of Symbolic Computation, **40**, *6* (2005). + https://doi.org/10.1016/j.jsc.2005.05.007. """ c = xgi.h_eigenvector_centrality(net, max_iter, tol) return {n: c[n] for n in c if n in bunch} +def z_eigenvector_centrality(net, bunch, max_iter=10, tol=1e-6): + r"""Compute the Z-eigenvector centrality of a hypergraph. + + The Z-eigenvector terminology comes from Qi (2005) which + defines a "tensor Z-eigenpair". + + Parameters + ---------- + net : xgi.Hypergraph + The hypergraph of interest. + bunch : Iterable + Nodes in `net`. + max_iter : int, default: 10 + The maximum number of iterations before the algorithm terminates. + tol : float > 0, default: 1e-6 + The desired L2 error in the centrality vector. + + Returns + ------- + dict + Centrality, where keys are node IDs and values are centralities. + + References + ---------- + Three Hypergraph Eigenvector Centralities, + Austin R. Benson, + https://doi.org/10.1137/18M1203031 + + Liqun Qi + "Eigenvalues of a real supersymmetric tensor" + Journal of Symbolic Computation, **40**, *6* (2005). + https://doi.org/10.1016/j.jsc.2005.05.007. + """ + c = xgi.z_eigenvector_centrality(net, max_iter, tol) + return {n: c[n] for n in c if n in bunch} + + def node_edge_centrality( net, bunch, @@ -383,7 +433,7 @@ def node_edge_centrality( max_iter=100, tol=1e-6, ): - """Computes node centralities. + """Computes nonlinear node-edge centralities. Parameters ---------- @@ -436,7 +486,7 @@ def node_edge_centrality( def katz_centrality(net, bunch, cutoff=100): - """Compute the H-eigenvector centrality of a hypergraph. + r"""Compute the Katz centrality of a hypergraph. Parameters ---------- diff --git a/xgi/utils/__init__.py b/xgi/utils/__init__.py index 570aa121e..11ace38e6 100644 --- a/xgi/utils/__init__.py +++ b/xgi/utils/__init__.py @@ -1,3 +1,4 @@ -from . import trie, utilities +from . import tensor, trie, utilities +from .tensor import * from .trie import * from .utilities import * diff --git a/xgi/utils/tensor.py b/xgi/utils/tensor.py new file mode 100644 index 000000000..2742301c6 --- /dev/null +++ b/xgi/utils/tensor.py @@ -0,0 +1,333 @@ +## Tensor times same vector in all but one (TTSV1) and all but two (TTSV2) +from collections import defaultdict +from itertools import combinations +from math import factorial + +import numpy as np +from numpy import prod +from scipy.signal import convolve +from scipy.sparse import coo_array +from scipy.special import binom as binomial + +__all__ = [ + "pairwise_incidence", + "ttsv1", + "ttsv2", +] + + +def pairwise_incidence(edge_dict, max_size): + """Create pairwise incidence dictionary from hyperedge list dictionary + + Parameters + ---------- + edge_dict : dict + edge IDs are keys, edges are values + max_size : int + the size of the largest edge in the hypergraph + + Returns + ------- + pairs : dict + a dictionary with node pairs as keys and the hyperedges they appear in as values + """ + pairs = defaultdict(set) + for e, edge in edge_dict.items(): + for i, j in combinations(sorted(edge), 2): + pairs[(i, j)].add(e) + for n in edge: + pairs[(n, n)].add(e) + + if len(edge) < max_size: + for n in edge: + pairs[(n, n)].add(e) + return pairs + + +def banerjee_coeff(size, max_size): + r"""Return the Banerjee alpha coefficient + + This coefficient measures the size of the set of edge blowups + defined in the corresponding references below. For example, + for the edge :math:`e=\{1, 3\}` in a rank 3 hypergraph, we have + the following blowup. + + .. math:: + \beta(e) = \{1, 1, 3\}, \{1, 3, 1\}, \{1, 3, 3\}, \{3, 1, 1\}, \{3, 1, 3\}, \{3, 3, 1\} + + Parameters + ---------- + size : int + size of given hyperedge + max_size : int + maximum hyperedge size + + Returns + ------- + float + the Banerjee coefficient + + References + ---------- + Anirban Banerjee, Arnab Char, and Bibhash Mondal, + "Spectra of general hypergraphs" + Linear Algebra and its Applications, **518**, 14-30 (2017), + https://doi.org/10.1016/j.laa.2016.12.022 + + Scalable Tensor Methods for Nonuniform Hypergraphs, + Sinan Aksoy, Ilya Amburg, Stephen Young, + https://doi.org/10.1137/23M1584472 + """ + return sum( + ((-1) ** j) * binomial(size, j) * (size - j) ** max_size + for j in range(size + 1) + ) + + +def ttsv1(node_dict, edge_dict, r, a): + """Computes the tensor times same vector in all modes but 1. + + This method uses generating functions as described in the corresponding reference. + + Parameters + ---------- + node_dict : dict + A dictionary with nodes as keys and hyperedges they appear in + as values. + edge_dict : dict + A dictionary with edges as keys and nodes which are members as + values. + r : int + maximum hyperedge size + a : NumPy array + the vector to multiply the tensor by. + + Returns + ------- + NumPy array + The tensor multiplied by the vector in all modes but 1. + + See Also + -------- + ttsv2 + + References + ---------- + Scalable Tensor Methods for Nonuniform Hypergraphs, + Sinan Aksoy, Ilya Amburg, Stephen Young, + https://doi.org/10.1137/23M1584472 + """ + n = len(node_dict) + s = np.zeros(n) + r_minus_1_factorial = factorial(r - 1) + for node, edges in node_dict.items(): + c = 0 + for e in edges: + l = len(edge_dict[e]) + alpha = banerjee_coeff(l, r) + edge_without_node = [v for v in edge_dict[e] if v != node] + if l == r: + gen_fun_coef = prod(a[edge_without_node]) + elif 2 ** (l - 1) < r * (l - 1): + gen_fun_coef = _get_gen_coef_subset_expansion( + a[edge_without_node], a[node], r - 1 + ) + else: + gen_fun_coef = _get_gen_coef_fft_fast_array( + edge_without_node, a, node, l, r + ) + c += r_minus_1_factorial * l * gen_fun_coef / alpha + s[node] = c + return s + + +def ttsv2(pair_dict, edge_dict, r, a, n): + """Computes the tensor times same vector in all modes but 2. + + Parameters + ---------- + pair_dict : dict + A dictionary with node pairs as keys and hyperedges they appear in + as values. + edge_dict : dict + A dictionary with edges as keys and nodes which are members as + values. + r : int + maximum hyperedge size + a : NumPy array + the vector to multiply the tensor by. + n : int + Number of nodes + + Returns + ------- + Scipy sparse array + A 2D array, which is the result of the tensor + multiplied by the vector in all modes but 2. + + See Also + -------- + ttsv1 + + References + ---------- + Scalable Tensor Methods for Nonuniform Hypergraphs, + Sinan Aksoy, Ilya Amburg, Stephen Young, + https://doi.org/10.1137/23M1584472 + """ + s = {} + r_minus_2_factorial = factorial(r - 2) + for (v1, v2), edges in pair_dict.items(): + c = 0 + for e in edges: + l = len(edge_dict[e]) + alpha = banerjee_coeff(l, r) + edge_without_node = [v for v in edge_dict[e] if v != v1 and v != v2] + if v1 != v2: + if 2 ** (l - 2) < (r - 2) * (l - 2): + gen_fun_coef = _get_gen_coef_subset_expansion( + a[edge_without_node], a[v1] + a[v2], r - 2 + ) + else: + coefs = [1] + for i in range(1, r - 1): + coefs.append(coefs[-1] * (a[v1] + a[v2]) / i) + coefs = np.array(coefs) + for u in edge_dict[e]: + if u != v1 and u != v2: + _coefs = [1] + for i in range(1, r - l + 2): + _coefs.append(_coefs[-1] * a[u] / i) + _coefs = np.array(_coefs) + _coefs[0] = 0 + coefs = convolve(coefs, _coefs)[0 : r - 1] + gen_fun_coef = coefs[-1] + else: + if 2 ** (l - 1) < (r - 2) * (l - 1): + gen_fun_coef = _get_gen_coef_subset_expansion( + a[edge_without_node], a[v1], r - 2 + ) + else: + coefs = [1] + for i in range(1, r - 1): + coefs.append(coefs[-1] * (a[v1]) / i) + coefs = np.array(coefs) + for u in edge_dict[e]: + if u != v1 and u != v2: + _coefs = [1] + for i in range(1, r - l + 1): + _coefs.append(_coefs[-1] * a[v1] / i) + _coefs = np.array(_coefs) + _coefs[0] = 0 + coefs = convolve(coefs, _coefs)[0 : r - 1] + gen_fun_coef = coefs[-1] + c += r_minus_2_factorial * l * gen_fun_coef / alpha + s[(v1, v2)] = c + if v1 == v2: + s[(v1, v2)] /= 2 + first = np.zeros(len(s)) + second = np.zeros(len(s)) + value = np.zeros(len(s)) + for i, k in enumerate(s.keys()): + first[i] = k[0] + second[i] = k[1] + value[i] = s[k] + Y = coo_array((value, (first, second)), (n, n)) + return Y + Y.T + + +## Helper functions for the tensor methods. + + +def _get_gen_coef_subset_expansion(edge_values, node_value, r): + """Computes the generating funciton coefficient of order r using subset expansion. + + Parameters + ---------- + edge_values : NumPy array + Array of values from the `a` vector corresponding to + nodes in the hyperedge. + node_value : float + The value in a corresponding to the node being processed. + r : int + Desired order to get coefficient for. + + Returns + ------- + float + Generating function coefficient of order r. + + See Also + -------- + _get_gen_coef_fft_fast_array + + References + ---------- + Scalable Tensor Methods for Nonuniform Hypergraphs, + Sinan Aksoy, Ilya Amburg, Stephen Young, + https://doi.org/10.1137/23M1584472 + """ + k = len(edge_values) + subset_vector = [0] + subset_lengths = [0] + for i in range(k): + for t in range(len(subset_vector)): + subset_vector.append(subset_vector[t] + edge_values[i]) + subset_lengths.append(subset_lengths[t] + 1) + for i in range(len(subset_lengths)): + subset_lengths[i] = (-1) ** (k - subset_lengths[i]) + total = sum( + [ + (node_value + subset_vector[i]) ** r * subset_lengths[i] + for i in range(len(subset_lengths)) + ] + ) + return total / factorial(r) + + +def _get_gen_coef_fft_fast_array(edge_without_node, a, node, l, r): + """Computes the generating funciton coefficient of order r using the Fast Fourier Transform. + + Parameters + ---------- + edge_without_node : list + Array of node indices corresponding to + all nodes in the hyperedge but the one being processed. + a : NumPy array + The vector to multiply the tensor by. + node : int + The index of the node being processed. + l : int + Number of nodes in the hyperedge. + r : int + Desired order to get coefficient for. + + Returns + ------- + float + Generating function coefficient of order r. + + See Also + -------- + _get_gen_coef_subset_expansion + + References + ---------- + Scalable Tensor Methods for Nonuniform Hypergraphs, + Sinan Aksoy, Ilya Amburg, Stephen Young, + https://doi.org/10.1137/23M1584472 + """ + coefs = [1] + for i in range(1, r): + coefs.append(coefs[-1] * a[node] / i) + coefs = np.array(coefs) + for u in edge_without_node: + _coefs = [1] + for i in range(1, r - l + 2): + _coefs.append(_coefs[-1] * a[u] / i) + _coefs = np.array(_coefs) + _coefs[0] = 0 + coefs = convolve(coefs, _coefs)[0:r] + gen_fun_coef = coefs[-1] + print("hi") + return gen_fun_coef