Add Matrix.from_edgelist and matrix.to_edgelist (python-graphblas…

…#374) * Add `Matrix.from_edgelist` and `matrix.to_edgelist` * Also allow `from_edgelist` to accept `(row, column, value)` triples * Add `Vector.from_pairs` that is similar to `Matrix.from_edgelist` * Move flake8 config to `.flake8` and remove `setup.cfg` * Don't allow `values` to be in NumPy arrays in `from_edgelist`
jim22k · Jan 25, 2023 · 2078110 · 2078110
1 parent 436e329
commit 2078110
Show file tree

Hide file tree

Showing 12 changed files with 296 additions and 13 deletions.
diff --git a/setup.cfg → .flake8 b/setup.cfg → .flake8
@@ -1,6 +1,3 @@
-[aliases]
-test=pytest
-
 [flake8]
 max-line-length = 100
 inline-quotes = "
@@ -17,9 +14,3 @@ per-file-ignores =
     graphblas/tests/*.py:T201,
     graphblas/core/ss/matrix.py:SIM113,
     graphblas/**/__init__.py:F401,
-
-[build_sphinx]
-all-files = 1
-source-dir = docs
-build-dir = docs/_build
-warning-is-error = 1
diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml
@@ -290,7 +290,7 @@ jobs:
           rm script.py
           # Tests whose coverage depend on order of tests :/
           # TODO: understand why these are order-dependent and try to fix
-          coverage run -a -m pytest --color=yes -x --no-mapnumpy -k test_binaryop_attributes_numpy graphblas/tests/test_op.py
+          coverage run -a -m pytest --color=yes -x --no-mapnumpy --runslow -k test_binaryop_attributes_numpy graphblas/tests/test_op.py
           # coverage run -a -m pytest --color=yes -x --no-mapnumpy -k test_npmonoid graphblas/tests/test_numpyops.py --runslow
       - name: Auto-generated code check
         if: matrix.slowtask == 'pytest_bizarro'

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -56,7 +56,7 @@ repos:
         # These versions need updated manually
         - flake8==6.0.0
         - flake8-comprehensions==3.10.1
-        - flake8-bugbear==23.1.17
+        - flake8-bugbear==23.1.20
         - flake8-simplify==0.19.3
   - repo: https://github.com/asottile/yesqa
     rev: v1.4.0
@@ -71,10 +71,9 @@ repos:
         additional_dependencies: [tomli]
         files: ^(graphblas|docs)/
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.229
+    rev: v0.0.233
     hooks:
       - id: ruff
-        args: [--force-exclude]
   - repo: https://github.com/sphinx-contrib/sphinx-lint
     rev: v0.6.7
     hooks:

diff --git a/graphblas/core/automethods.py b/graphblas/core/automethods.py
@@ -269,6 +269,10 @@ def to_dicts(self):
     return self._get_value("to_dicts")
 
 
+def to_edgelist(self):
+    return self._get_value("to_edgelist")
+
+
 def to_values(self):
     return self._get_value("to_values")
 
@@ -409,6 +413,7 @@ def _main():
         "to_dcsc",
         "to_dcsr",
         "to_dicts",
+        "to_edgelist",
     }
     common_raises = set()
     scalar_raises = {

diff --git a/graphblas/core/infix.py b/graphblas/core/infix.py
@@ -344,6 +344,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts):
     to_dcsc = wrapdoc(Matrix.to_dcsc)(property(automethods.to_dcsc))
     to_dcsr = wrapdoc(Matrix.to_dcsr)(property(automethods.to_dcsr))
     to_dicts = wrapdoc(Matrix.to_dicts)(property(automethods.to_dicts))
+    to_edgelist = wrapdoc(Matrix.to_edgelist)(property(automethods.to_edgelist))
     to_values = wrapdoc(Matrix.to_values)(property(automethods.to_values))
     wait = wrapdoc(Matrix.wait)(property(automethods.wait))
     # These raise exceptions

diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py
@@ -494,6 +494,11 @@ def to_coo(self, dtype=None, *, rows=True, columns=True, values=True, sort=True)
             If internally stored rowwise, the sorting will be first by rows, then by column.
             If internally stored columnwise, the sorting will be first by column, then by row.
 
+        See Also
+        --------
+        to_edgelist
+        from_coo
+
         Returns
         -------
         np.ndarray[dtype=uint64] : Rows
@@ -543,6 +548,35 @@ def to_coo(self, dtype=None, *, rows=True, columns=True, values=True, sort=True)
             c_values if values else None,
         )
 
+    def to_edgelist(self, dtype=None, *, values=True, sort=True):
+        """Extract the indices and values as a 2-tuple of numpy arrays.
+
+        This calls ``to_coo`` then transforms the data into an edgelist.
+
+        Parameters
+        ----------
+        dtype :
+            Requested dtype for the output values array.
+        values : bool, default=True
+            Whether to return values; will return `None` for values if `False`
+        sort : bool, default=True
+            Whether to require sorted indices.
+            If internally stored rowwise, the sorting will be first by rows, then by column.
+            If internally stored columnwise, the sorting will be first by column, then by row.
+
+        See Also
+        --------
+        to_coo
+        from_edgelist
+
+        Returns
+        -------
+        np.ndarray[dtype=uint64] : Edgelist
+        np.ndarray : Values
+        """
+        rows, columns, values = self.to_coo(dtype, values=values, sort=sort)
+        return (np.column_stack([rows, columns]), values)
+
     def build(self, rows, columns, values, *, dup_op=None, clear=False, nrows=None, ncols=None):
         """Rarely used method to insert values into an existing Matrix.
 
@@ -827,6 +861,11 @@ def from_coo(
         name : str, optional
             Name to give the Matrix.
 
+        See Also
+        --------
+        from_edgelist
+        to_coo
+
         Returns
         -------
         Matrix
@@ -864,6 +903,99 @@ def from_coo(
             C.build(rows, columns, values, dup_op=dup_op)
         return C
 
+    @classmethod
+    def from_edgelist(
+        cls,
+        edgelist,
+        values=None,
+        dtype=None,
+        *,
+        nrows=None,
+        ncols=None,
+        dup_op=None,
+        name=None,
+    ):
+        """Create a new Matrix from edgelist of (row, col) pairs or (row, col, value) triples.
+
+        This transforms the data and calls ``Matrix.from_coo``.
+
+        Parameters
+        ----------
+        edgelist : list or np.ndarray or iterable
+            A sequence of ``(row, column)`` pairs or ``(row, column, value)`` triples.
+            NumPy edgelist only supports ``(row, column)``; values must be passed separately.
+        values : list or np.ndarray or scalar, optional
+            List of values. If a scalar is provided, all values will be set to this single value.
+            The default is 1.0 if ``edgelist`` is a sequence of ``(row, column)`` pairs.
+        dtype :
+            Data type of the Matrix. If not provided, the values will be inspected
+            to choose an appropriate dtype.
+        nrows : int, optional
+            Number of rows in the Matrix. If not provided, ``nrows`` is computed
+            from the maximum row index found in the edgelist.
+        ncols : int, optional
+            Number of columns in the Matrix. If not provided, ``ncols`` is computed
+            from the maximum column index found in the edgelist.
+        dup_op : :class:`~graphblas.core.operator.BinaryOp`, optional
+            Function used to combine values if duplicate indices are found.
+            Leaving ``dup_op=None`` will raise an error if duplicates are found.
+        name : str, optional
+            Name to give the Matrix.
+
+        See Also
+        --------
+        from_coo
+        to_edgelist
+
+        Returns
+        -------
+        Matrix
+        """
+        edgelist_values = None
+        if isinstance(edgelist, np.ndarray):
+            if edgelist.ndim != 2:
+                raise ValueError(
+                    f"edgelist array must have 2 dimensions (nvals x 2); got {edgelist.ndim}"
+                )
+            if edgelist.shape[1] == 3:
+                raise ValueError(
+                    "edgelist as NumPy array only supports ``(row, column)``; "
+                    "values must be passed separately."
+                )
+            if edgelist.shape[1] != 2:
+                raise ValueError(
+                    "Last dimension of edgelist array must be length 2 "
+                    f"(for row and column); got {edgelist.shape[1]}"
+                )
+            rows = edgelist[:, 0]
+            cols = edgelist[:, 1]
+        else:
+            unzipped = list(zip(*edgelist))
+            if len(unzipped) == 2:
+                rows, cols = unzipped
+            elif len(unzipped) == 3:
+                rows, cols, edgelist_values = unzipped
+            elif not unzipped:
+                # Empty edgelist (nrows and ncols should be given)
+                rows = cols = unzipped
+            else:
+                raise ValueError(
+                    "Each item in the edgelist must have two or three elements "
+                    f"(for row and column index, and maybe values); got {len(unzipped)}"
+                )
+        if values is None:
+            if edgelist_values is None:
+                values = 1.0
+            else:
+                values = edgelist_values
+        elif edgelist_values is not None:
+            raise TypeError(
+                "Too many sources of values: from `edgelist` triples and from `values=` argument"
+            )
+        return cls.from_coo(
+            rows, cols, values, dtype, nrows=nrows, ncols=ncols, dup_op=dup_op, name=name
+        )
+
     @classmethod
     def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name):
         if fmt is _CSR_FORMAT:
@@ -987,6 +1119,7 @@ def from_csr(
         from_coo
         from_csc
         from_dcsr
+        to_csr
         Matrix.ss.import_csr
         io.from_scipy_sparse
         """
@@ -1033,6 +1166,7 @@ def from_csc(
         from_coo
         from_csr
         from_dcsc
+        to_csc
         Matrix.ss.import_csc
         io.from_scipy_sparse
         """
@@ -1092,6 +1226,7 @@ def from_dcsr(
         from_coo
         from_csr
         from_dcsc
+        to_dcsr
         Matrix.ss.import_hypercsr
         io.from_scipy_sparse
         """
@@ -1175,6 +1310,7 @@ def from_dcsc(
         from_coo
         from_csc
         from_dcsr
+        to_dcsc
         Matrix.ss.import_hypercsc
         io.from_scipy_sparse
         """
@@ -1265,6 +1401,10 @@ def from_dicts(
         name : str, optional
             Name to give the Matrix.
 
+        See Also
+        --------
+        to_dicts
+
         Returns
         -------
         Matrix
@@ -1360,6 +1500,7 @@ def to_csr(self, dtype=None):
         to_coo
         to_csc
         to_dcsr
+        from_csr
         Matrix.ss.export
         io.to_scipy_sparse
         """
@@ -1385,6 +1526,7 @@ def to_csc(self, dtype=None):
         to_coo
         to_csr
         to_dcsc
+        from_csc
         Matrix.ss.export
         io.to_scipy_sparse
         """
@@ -1413,6 +1555,7 @@ def to_dcsr(self, dtype=None):
         to_coo
         to_csr
         to_dcsc
+        from_dcsc
         Matrix.ss.export
         io.to_scipy_sparse
         """
@@ -1459,6 +1602,7 @@ def to_dcsc(self, dtype=None):
         to_coo
         to_csc
         to_dcsr
+        from_dcsc
         Matrix.ss.export
         io.to_scipy_sparse
         """
@@ -1496,6 +1640,10 @@ def to_dicts(self, order="rowwise"):
             "columnwise" returns dict of dicts as ``{col: {row: val}}``.
             The default is "rowwise".
 
+        See Also
+        --------
+        from_dicts
+
         Returns
         -------
         dict
@@ -3102,6 +3250,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts):
     to_dcsc = wrapdoc(Matrix.to_dcsc)(property(automethods.to_dcsc))
     to_dcsr = wrapdoc(Matrix.to_dcsr)(property(automethods.to_dcsr))
     to_dicts = wrapdoc(Matrix.to_dicts)(property(automethods.to_dicts))
+    to_edgelist = wrapdoc(Matrix.to_edgelist)(property(automethods.to_edgelist))
     to_values = wrapdoc(Matrix.to_values)(property(automethods.to_values))
     wait = wrapdoc(Matrix.wait)(property(automethods.wait))
     # These raise exceptions
@@ -3200,6 +3349,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts):
     to_dcsc = wrapdoc(Matrix.to_dcsc)(property(automethods.to_dcsc))
     to_dcsr = wrapdoc(Matrix.to_dcsr)(property(automethods.to_dcsr))
     to_dicts = wrapdoc(Matrix.to_dicts)(property(automethods.to_dicts))
+    to_edgelist = wrapdoc(Matrix.to_edgelist)(property(automethods.to_edgelist))
     to_values = wrapdoc(Matrix.to_values)(property(automethods.to_values))
     wait = wrapdoc(Matrix.wait)(property(automethods.wait))
     # These raise exceptions
@@ -3365,6 +3515,7 @@ def to_dicts(self, order="rowwise"):
     get = Matrix.get
     isequal = Matrix.isequal
     isclose = Matrix.isclose
+    to_edgelist = Matrix.to_edgelist
     wait = Matrix.wait
     _extract_element = Matrix._extract_element
     _prep_for_extract = Matrix._prep_for_extract