Remove use of from_numpy and misc updates/cleanup (python-graphblas…

…#395)
jim22k · Feb 27, 2023 · 7be68cf · 7be68cf
1 parent 9a4808a
commit 7be68cf
Show file tree

Hide file tree

Showing 15 changed files with 78 additions and 39 deletions.
diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml
@@ -142,7 +142,7 @@ jobs:
           # Consider removing old versions when they become problematic or very old (>=2 years).
           nxver=$(python -c 'import random ; print(random.choice(["=2.7", "=2.8", "=3.0", ""]))')
           yamlver=$(python -c 'import random ; print(random.choice(["=5.4", "=6.0", ""]))')
-          sparsever=$(python -c 'import random ; print(random.choice(["=0.12", "=0.13", ""]))')
+          sparsever=$(python -c 'import random ; print(random.choice(["=0.12", "=0.13", "=0.14", ""]))')
           if [[ ${{ steps.pyver.outputs.selected }} == "3.8" ]]; then
             npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))')
             spver=$(python -c 'import random ; print(random.choice(["=1.8", "=1.9", "=1.10", ""]))')
@@ -175,7 +175,7 @@ jobs:
           # That is, we don't need to support versions of it that are two years old.
           # But, it's still useful for us to test with different versions!
           if [[ ${{ steps.sourcetype.outputs.selected}} == "conda-forge" ]] ; then
-            psgver=$(python -c 'import random ; print(random.choice(["=7.4.0", "=7.4.1", "=7.4.2", ""]))')
+            psgver=$(python -c 'import random ; print(random.choice(["=7.4.0", "=7.4.1", "=7.4.2", "=7.4.3.0", "=7.4.3.1", ""]))')
           else
             psgver=""
           fi
@@ -187,7 +187,7 @@ jobs:
           echo "versions: np${npver} sp${spver} pd${pdver} ak${akver} nx${nxver} numba${numbaver} yaml${yamlver} sparse${sparsever} psgver${psgver}"
 
           # Once we have wheels for all OSes, we can delete the last two lines.
-          mamba install pytest coverage coveralls=3.3.1 pytest-randomly cffi donfig pyyaml${yamlver} sparse${sparsever} \
+          mamba install packaging pytest coverage coveralls=3.3.1 pytest-randomly cffi donfig pyyaml${yamlver} sparse${sparsever} \
             pandas${pdver} scipy${spver} numpy${npver} awkward${akver} networkx${nxver} numba${numbaver} \
             ${{ matrix.slowtask == 'pytest_bizarro' && 'black' || '' }} \
             ${{ matrix.slowtask == 'notebooks' && 'matplotlib nbconvert jupyter "ipython>=7"' || '' }} \

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -71,7 +71,7 @@ repos:
         additional_dependencies: [tomli]
         files: ^(graphblas|docs)/
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.249
+    rev: v0.0.252
     hooks:
       - id: ruff
   - repo: https://github.com/sphinx-contrib/sphinx-lint

diff --git a/README.md b/README.md
@@ -186,16 +186,18 @@ Similar methods exist for BinaryOp, Monoid, and Semiring.
 ```python
 import graphblas as gb
 
-# numpy arrays
-# 1-D array becomes Vector, 2-D array becomes Matrix
-A = gb.io.from_numpy(m)
-m = gb.io.to_numpy(A)
-
 # scipy.sparse matrices
 A = gb.io.from_scipy_sparse(m)
 m = gb.io.to_scipy_sparse(m, format='csr')
 
 # networkx graphs
 A = gb.io.from_networkx(g)
 g = gb.io.to_networkx(A)
+
+# numpy arrays can use `from_dense` and `to_dense` on Vector and Matrix
+v = gb.Vector.from_dense(m)
+m = v.to_dense()
+
+A = gb.Matrix.from_dense(m, missing_value=0)
+m = A.to_dense(fill_value=0)
 ```
diff --git a/docs/user_guide/io.rst b/docs/user_guide/io.rst
@@ -14,7 +14,7 @@ Matrix and Vector, instead, have a ``.from_coo()`` and a ``.to_coo()`` method.
 ``.from_coo()`` takes index(es) and values as either:
 
   - Python lists
-  - Numpy arrays
+  - NumPy arrays
 
 If no dtype is provided, the data type is inferred from the values.
 
@@ -77,17 +77,17 @@ A python-graphblas Matrix can be created from a 2-D (PyData) sparse array or mat
 ``gb.io.to_pydata_sparse()`` will output a 2-D (PyData) sparse array given a python-graphblas Matrix.
 The sparse format can be specified. It defaults to "coo".
 
-Numpy (Dense)
+NumPy (Dense)
 -------------
 
 While not useful for very large graphs, converting to and from small dense numpy arrays can be useful.
 
-``gb.io.from_numpy()`` will convert a 1-D array into a Vector and a 2-D array into a Matrix. When converting
-from numpy, zeros are treated as missing values.
-
-``gb.io.to_numpy()`` will convert a Vector or Matrix into the dense equivalent in numpy, filling missing
-values with zero.
+``Vector.from_dense()`` converts a 1-D array into a Vector and
+``Matrix.from_dense()`` a 2-D array into a Matrix. When converting from numpy, a value may be
+chosen to become a missing value, such as ``Matrix.from_dense(a, missing_value=0)``.
 
+``.to_dense()`` converts a Vector or Matrix into a numpy array. If there are missing values, a fill
+value should be given such as ``.to_dense(fill_value=0)``.
 
 SuiteSparse Export/Import
 -------------------------

diff --git a/environment.yml b/environment.yml
@@ -34,6 +34,7 @@ dependencies:
     # For linting
     - pre-commit
     # For testing
+    - packaging
     - pytest-cov
     # For debugging
     - icecream

diff --git a/graphblas/core/operator.py b/graphblas/core/operator.py
@@ -941,8 +941,7 @@ def __contains__(self, type_):
             self[type_]
         except (TypeError, KeyError, numba.NumbaError):
             return False
-        else:
-            return True
+        return True
 
     @classmethod
     def _remove_nesting(cls, funcname, *, module=None, modname=None, strict=True):
@@ -2740,9 +2739,9 @@ def _initialize(cls):
                 cur_op._typed_ops[dtype] = bool_op
 
         # Builtin monoids that are idempotent; i.e., `op(x, x) == x` for any x
-        for name in {"any", "band", "bor", "land", "lor", "max", "min"}:
+        for name in ["any", "band", "bor", "land", "lor", "max", "min"]:
             getattr(monoid, name)._is_idempotent = True
-        for name in {
+        for name in [
             "bitwise_and",
             "bitwise_or",
             "fmax",
@@ -2752,7 +2751,7 @@ def _initialize(cls):
             "logical_or",
             "maximum",
             "minimum",
-        }:
+        ]:
             getattr(monoid.numpy, name)._is_idempotent = True
 
         # Allow some functions to work on UDTs

diff --git a/graphblas/dtypes.py b/graphblas/dtypes.py
@@ -44,8 +44,7 @@ def __lt__(self, other):
             t2 = lookup_dtype(other).np_type
         except ValueError:
             raise TypeError(f"Invalid or unknown datatype: {other}") from None
-        else:
-            return (t1.kind, t1.itemsize, t1.name) < (t2.kind, t2.itemsize, t2.name)
+        return (t1.kind, t1.itemsize, t1.name) < (t2.kind, t2.itemsize, t2.name)
 
     def __reduce__(self):
         if self._is_udt:

diff --git a/graphblas/io.py b/graphblas/io.py
@@ -550,14 +550,17 @@ def to_pydata_sparse(A, format="coo"):
     if format not in {"coo", "dok", "gcxs"}:
         raise ValueError(f"Invalid format: {format}")
 
-    if format == "gcxs":
-        B = to_scipy_sparse(A, format="csr")
+    if _output_type(A) is _Vector:
+        indices, values = A.to_coo(sort=False)
+        s = COO(indices, values, shape=A.shape)
     else:
-        # obtain an intermediate conversion via hardcoded 'coo' intermediate object
-        B = to_scipy_sparse(A, format="coo")
-
-    # convert to pydata.sparse
-    s = COO.from_scipy_sparse(B)
+        if format == "gcxs":
+            B = to_scipy_sparse(A, format="csr")
+        else:
+            # obtain an intermediate conversion via hardcoded 'coo' intermediate object
+            B = to_scipy_sparse(A, format="coo")
+        # convert to pydata.sparse
+        s = COO.from_scipy_sparse(B)
 
     # express in the desired format
     return s.asformat(format)

diff --git a/graphblas/tests/test_core.py b/graphblas/tests/test_core.py
@@ -54,4 +54,6 @@ class bad:
 
 
 def test_version():
-    assert gb.__version__ > "2022.11.0"
+    from packaging.version import parse
+
+    assert parse(gb.__version__) > parse("2022.11.0")
diff --git a/graphblas/tests/test_dtype.py b/graphblas/tests/test_dtype.py
@@ -1,14 +1,19 @@
 import itertools
 import pickle
 import string
+import sys
 
 import numpy as np
 import pytest
 
+import graphblas as gb
 from graphblas import dtypes
 from graphblas.core import lib
 from graphblas.dtypes import lookup_dtype
 
+suitesparse = gb.backend == "suitesparse"
+is_win = sys.platform.startswith("win")
+
 all_dtypes = [
     dtypes.BOOL,
     dtypes.INT8,
@@ -233,3 +238,21 @@ def test_dtype_to_from_string():
                 lookup_dtype(dtype)
         else:
             assert dtype == dtype2
+
+
+def test_has_complex():
+    """Only SuiteSparse has complex (with Windows support in Python after v7.4.3.1)"""
+    if not suitesparse:
+        assert not dtypes._supports_complex
+        return
+    if not is_win:
+        assert dtypes._supports_complex
+        return
+
+    import suitesparse_graphblas as ssgb
+    from packaging.version import parse
+
+    if parse(ssgb.__version__) < parse("7.4.3.1"):
+        assert not dtypes._supports_complex
+    else:
+        assert dtypes._supports_complex
diff --git a/graphblas/tests/test_io.py b/graphblas/tests/test_io.py
@@ -406,7 +406,8 @@ def test_vector_to_from_pydata_sparse():
     assert v.isequal(gb.Vector.from_coo(coords, data, dtype=dtypes.INT64), check_dtype=True)
 
     t = gb.io.to_pydata_sparse(v)
-    assert t == s
+    assert t.shape == s.shape
+    assert (t == s).all()
 
 
 @pytest.mark.skipif("not sparse")
@@ -418,7 +419,8 @@ def test_matrix_to_from_pydata_sparse():
     assert v.isequal(gb.Matrix.from_coo(*coords, data, dtype=dtypes.INT64), check_dtype=False)
 
     t = gb.io.to_pydata_sparse(v)
-    assert t == s
+    assert t.shape == s.shape
+    assert (t == s).all()
 
     # test ndim
     e = sparse.random(shape=(5, 5, 5), density=0)
@@ -437,7 +439,8 @@ def test_matrix_to_from_pydata_sparse():
     assert w.isequal(gb.Matrix.from_coo(*coords, data, dtype=dtypes.INT64), check_dtype=False)
 
     r = gb.io.to_pydata_sparse(w, format="gcxs")
-    assert r == g
+    assert r.shape == g.shape
+    assert (r == g).all()
     with pytest.raises(ValueError, match="format"):
         gb.io.to_pydata_sparse(w, format="badformat")
     with pytest.raises(TypeError, match="sparse.pydata"):

diff --git a/graphblas/viz.py b/graphblas/viz.py
@@ -182,7 +182,7 @@ def datashade(M, agg="count", *, width=None, height=None, opts_kwargs=None, **kw
             images.extend(image_row)
         return hv.Layout(images).cols(ncols)
 
-    kwds = dict(  # noqa: C408
+    kwds = dict(  # noqa: C408 pylint: disable=use-dict-literal
         x="col",
         y="row",
         c="val",

diff --git a/notebooks/Louvain.ipynb b/notebooks/Louvain.ipynb
@@ -270,7 +270,7 @@
     "        [0, 0, 1, 0, 0, 1, 0],\n",
     "    ]\n",
     ")\n",
-    "g = gb.io.from_numpy(m)"
+    "g = Matrix.from_dense(m, missing_value=0)"
    ]
   },
   {
@@ -370,7 +370,7 @@
     "        [0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
     "    ]\n",
     ")\n",
-    "g = gb.io.from_numpy(m)"
+    "g = Matrix.from_dense(m, missing_value=0)"
    ]
   },
   {
@@ -469,7 +469,7 @@
     "        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],\n",
     "    ]\n",
     ")\n",
-    "g = gb.io.from_numpy(m)"
+    "g = Matrix.from_dense(m, missing_value=0)"
    ]
   },
   {

diff --git a/pyproject.toml b/pyproject.toml
@@ -83,6 +83,7 @@ viz = [
 ]
 test = [
     "pytest",
+    "packaging",
     "pandas >=1.2",
     "scipy >=1.8",
 ]
@@ -94,6 +95,7 @@ complete = [
     "sparse >=0.12",
     "matplotlib >=3.5",
     "pytest",
+    "packaging",
 ]
 
 [tool.setuptools]
@@ -147,6 +149,9 @@ filterwarnings = [
     "error",
     # MAINT: we can drop support for sparse <0.13 at any time
     "ignore:`np.bool` is a deprecated alias:DeprecationWarning:sparse._umath",  # sparse <0.13
+    # sparse 0.14.0 (2022-02-24) began raising this warning; it has been reported and fixed upstream.
+    "ignore:coords should be an ndarray. This will raise a ValueError:DeprecationWarning:sparse._coo.core",
+
     # setuptools v67.3.0 deprecated `pkg_resources.declare_namespace` on 13 Feb 2023. See:
     # https://setuptools.pypa.io/en/latest/history.html#v67-3-0
     # MAINT: check if this is still necessary in 2025
@@ -271,6 +276,7 @@ ignore = [
     "PLR0913",  # Too many arguments to function call
     "PLR0915",  # Too many statements
     "PLR2004",  # Magic number used in comparison, consider replacing magic with a constant variable
+    "PLW2901",  # Outer for loop variable ... overwritten by inner assignment target (Note: good advice, but too strict)
     "RET502",  # Do not implicitly `return None` in function able to return non-`None` value
     "RET503",  # Missing explicit `return` at the end of function able to return non-`None` value
     "RET504",  # Unnecessary variable assignment before `return` statement

diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh
@@ -5,9 +5,10 @@
 # Tip: add `--json` for more information.
 conda search 'numpy[channel=conda-forge]>=1.24.2'
 conda search 'pandas[channel=conda-forge]>=1.5.3'
-conda search 'scipy[channel=conda-forge]>=1.10.0'
+conda search 'scipy[channel=conda-forge]>=1.10.1'
 conda search 'networkx[channel=conda-forge]>=3.0'
 conda search 'awkward[channel=conda-forge]>=2.0.8'
+conda search 'sparse[channel=conda-forge]>=0.14.0'
 conda search 'numba[channel=conda-forge]>=0.56.4'
 conda search 'pyyaml[channel=conda-forge]>=6.0'
 conda search 'flake8-comprehensions[channel=conda-forge]>=3.10.1'