diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml index ea82052f8..6b36da3bc 100644 --- a/.github/workflows/test_and_build.yml +++ b/.github/workflows/test_and_build.yml @@ -142,7 +142,7 @@ jobs: # Consider removing old versions when they become problematic or very old (>=2 years). nxver=$(python -c 'import random ; print(random.choice(["=2.7", "=2.8", "=3.0", ""]))') yamlver=$(python -c 'import random ; print(random.choice(["=5.4", "=6.0", ""]))') - sparsever=$(python -c 'import random ; print(random.choice(["=0.12", "=0.13", ""]))') + sparsever=$(python -c 'import random ; print(random.choice(["=0.12", "=0.13", "=0.14", ""]))') if [[ ${{ steps.pyver.outputs.selected }} == "3.8" ]]; then npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))') spver=$(python -c 'import random ; print(random.choice(["=1.8", "=1.9", "=1.10", ""]))') @@ -175,7 +175,7 @@ jobs: # That is, we don't need to support versions of it that are two years old. # But, it's still useful for us to test with different versions! if [[ ${{ steps.sourcetype.outputs.selected}} == "conda-forge" ]] ; then - psgver=$(python -c 'import random ; print(random.choice(["=7.4.0", "=7.4.1", "=7.4.2", ""]))') + psgver=$(python -c 'import random ; print(random.choice(["=7.4.0", "=7.4.1", "=7.4.2", "=7.4.3.0", "=7.4.3.1", ""]))') else psgver="" fi @@ -187,7 +187,7 @@ jobs: echo "versions: np${npver} sp${spver} pd${pdver} ak${akver} nx${nxver} numba${numbaver} yaml${yamlver} sparse${sparsever} psgver${psgver}" # Once we have wheels for all OSes, we can delete the last two lines. - mamba install pytest coverage coveralls=3.3.1 pytest-randomly cffi donfig pyyaml${yamlver} sparse${sparsever} \ + mamba install packaging pytest coverage coveralls=3.3.1 pytest-randomly cffi donfig pyyaml${yamlver} sparse${sparsever} \ pandas${pdver} scipy${spver} numpy${npver} awkward${akver} networkx${nxver} numba${numbaver} \ ${{ matrix.slowtask == 'pytest_bizarro' && 'black' || '' }} \ ${{ matrix.slowtask == 'notebooks' && 'matplotlib nbconvert jupyter "ipython>=7"' || '' }} \ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index df37d4461..0c9c94988 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.249 + rev: v0.0.252 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/README.md b/README.md index 0b8318b3f..34c1c1994 100644 --- a/README.md +++ b/README.md @@ -186,11 +186,6 @@ Similar methods exist for BinaryOp, Monoid, and Semiring. ```python import graphblas as gb -# numpy arrays -# 1-D array becomes Vector, 2-D array becomes Matrix -A = gb.io.from_numpy(m) -m = gb.io.to_numpy(A) - # scipy.sparse matrices A = gb.io.from_scipy_sparse(m) m = gb.io.to_scipy_sparse(m, format='csr') @@ -198,4 +193,11 @@ m = gb.io.to_scipy_sparse(m, format='csr') # networkx graphs A = gb.io.from_networkx(g) g = gb.io.to_networkx(A) + +# numpy arrays can use `from_dense` and `to_dense` on Vector and Matrix +v = gb.Vector.from_dense(m) +m = v.to_dense() + +A = gb.Matrix.from_dense(m, missing_value=0) +m = A.to_dense(fill_value=0) ``` diff --git a/docs/user_guide/io.rst b/docs/user_guide/io.rst index 52211c465..9431ff413 100644 --- a/docs/user_guide/io.rst +++ b/docs/user_guide/io.rst @@ -14,7 +14,7 @@ Matrix and Vector, instead, have a ``.from_coo()`` and a ``.to_coo()`` method. ``.from_coo()`` takes index(es) and values as either: - Python lists - - Numpy arrays + - NumPy arrays If no dtype is provided, the data type is inferred from the values. @@ -77,17 +77,17 @@ A python-graphblas Matrix can be created from a 2-D (PyData) sparse array or mat ``gb.io.to_pydata_sparse()`` will output a 2-D (PyData) sparse array given a python-graphblas Matrix. The sparse format can be specified. It defaults to "coo". -Numpy (Dense) +NumPy (Dense) ------------- While not useful for very large graphs, converting to and from small dense numpy arrays can be useful. -``gb.io.from_numpy()`` will convert a 1-D array into a Vector and a 2-D array into a Matrix. When converting -from numpy, zeros are treated as missing values. - -``gb.io.to_numpy()`` will convert a Vector or Matrix into the dense equivalent in numpy, filling missing -values with zero. +``Vector.from_dense()`` converts a 1-D array into a Vector and +``Matrix.from_dense()`` a 2-D array into a Matrix. When converting from numpy, a value may be +chosen to become a missing value, such as ``Matrix.from_dense(a, missing_value=0)``. +``.to_dense()`` converts a Vector or Matrix into a numpy array. If there are missing values, a fill +value should be given such as ``.to_dense(fill_value=0)``. SuiteSparse Export/Import ------------------------- diff --git a/environment.yml b/environment.yml index 90675527c..f327a6980 100644 --- a/environment.yml +++ b/environment.yml @@ -34,6 +34,7 @@ dependencies: # For linting - pre-commit # For testing + - packaging - pytest-cov # For debugging - icecream diff --git a/graphblas/core/operator.py b/graphblas/core/operator.py index eca7c9d75..bfd03d9df 100644 --- a/graphblas/core/operator.py +++ b/graphblas/core/operator.py @@ -941,8 +941,7 @@ def __contains__(self, type_): self[type_] except (TypeError, KeyError, numba.NumbaError): return False - else: - return True + return True @classmethod def _remove_nesting(cls, funcname, *, module=None, modname=None, strict=True): @@ -2740,9 +2739,9 @@ def _initialize(cls): cur_op._typed_ops[dtype] = bool_op # Builtin monoids that are idempotent; i.e., `op(x, x) == x` for any x - for name in {"any", "band", "bor", "land", "lor", "max", "min"}: + for name in ["any", "band", "bor", "land", "lor", "max", "min"]: getattr(monoid, name)._is_idempotent = True - for name in { + for name in [ "bitwise_and", "bitwise_or", "fmax", @@ -2752,7 +2751,7 @@ def _initialize(cls): "logical_or", "maximum", "minimum", - }: + ]: getattr(monoid.numpy, name)._is_idempotent = True # Allow some functions to work on UDTs diff --git a/graphblas/dtypes.py b/graphblas/dtypes.py index 2f8b40e43..22d98b8f1 100644 --- a/graphblas/dtypes.py +++ b/graphblas/dtypes.py @@ -44,8 +44,7 @@ def __lt__(self, other): t2 = lookup_dtype(other).np_type except ValueError: raise TypeError(f"Invalid or unknown datatype: {other}") from None - else: - return (t1.kind, t1.itemsize, t1.name) < (t2.kind, t2.itemsize, t2.name) + return (t1.kind, t1.itemsize, t1.name) < (t2.kind, t2.itemsize, t2.name) def __reduce__(self): if self._is_udt: diff --git a/graphblas/io.py b/graphblas/io.py index 125e69d25..e9d8ccfe6 100644 --- a/graphblas/io.py +++ b/graphblas/io.py @@ -550,14 +550,17 @@ def to_pydata_sparse(A, format="coo"): if format not in {"coo", "dok", "gcxs"}: raise ValueError(f"Invalid format: {format}") - if format == "gcxs": - B = to_scipy_sparse(A, format="csr") + if _output_type(A) is _Vector: + indices, values = A.to_coo(sort=False) + s = COO(indices, values, shape=A.shape) else: - # obtain an intermediate conversion via hardcoded 'coo' intermediate object - B = to_scipy_sparse(A, format="coo") - - # convert to pydata.sparse - s = COO.from_scipy_sparse(B) + if format == "gcxs": + B = to_scipy_sparse(A, format="csr") + else: + # obtain an intermediate conversion via hardcoded 'coo' intermediate object + B = to_scipy_sparse(A, format="coo") + # convert to pydata.sparse + s = COO.from_scipy_sparse(B) # express in the desired format return s.asformat(format) diff --git a/graphblas/tests/test_core.py b/graphblas/tests/test_core.py index b96dda352..c08ca416f 100644 --- a/graphblas/tests/test_core.py +++ b/graphblas/tests/test_core.py @@ -54,4 +54,6 @@ class bad: def test_version(): - assert gb.__version__ > "2022.11.0" + from packaging.version import parse + + assert parse(gb.__version__) > parse("2022.11.0") diff --git a/graphblas/tests/test_dtype.py b/graphblas/tests/test_dtype.py index 1ed0c777b..64e6d69ab 100644 --- a/graphblas/tests/test_dtype.py +++ b/graphblas/tests/test_dtype.py @@ -1,14 +1,19 @@ import itertools import pickle import string +import sys import numpy as np import pytest +import graphblas as gb from graphblas import dtypes from graphblas.core import lib from graphblas.dtypes import lookup_dtype +suitesparse = gb.backend == "suitesparse" +is_win = sys.platform.startswith("win") + all_dtypes = [ dtypes.BOOL, dtypes.INT8, @@ -233,3 +238,21 @@ def test_dtype_to_from_string(): lookup_dtype(dtype) else: assert dtype == dtype2 + + +def test_has_complex(): + """Only SuiteSparse has complex (with Windows support in Python after v7.4.3.1)""" + if not suitesparse: + assert not dtypes._supports_complex + return + if not is_win: + assert dtypes._supports_complex + return + + import suitesparse_graphblas as ssgb + from packaging.version import parse + + if parse(ssgb.__version__) < parse("7.4.3.1"): + assert not dtypes._supports_complex + else: + assert dtypes._supports_complex diff --git a/graphblas/tests/test_io.py b/graphblas/tests/test_io.py index f5bd4736f..6fa43ebbc 100644 --- a/graphblas/tests/test_io.py +++ b/graphblas/tests/test_io.py @@ -406,7 +406,8 @@ def test_vector_to_from_pydata_sparse(): assert v.isequal(gb.Vector.from_coo(coords, data, dtype=dtypes.INT64), check_dtype=True) t = gb.io.to_pydata_sparse(v) - assert t == s + assert t.shape == s.shape + assert (t == s).all() @pytest.mark.skipif("not sparse") @@ -418,7 +419,8 @@ def test_matrix_to_from_pydata_sparse(): assert v.isequal(gb.Matrix.from_coo(*coords, data, dtype=dtypes.INT64), check_dtype=False) t = gb.io.to_pydata_sparse(v) - assert t == s + assert t.shape == s.shape + assert (t == s).all() # test ndim e = sparse.random(shape=(5, 5, 5), density=0) @@ -437,7 +439,8 @@ def test_matrix_to_from_pydata_sparse(): assert w.isequal(gb.Matrix.from_coo(*coords, data, dtype=dtypes.INT64), check_dtype=False) r = gb.io.to_pydata_sparse(w, format="gcxs") - assert r == g + assert r.shape == g.shape + assert (r == g).all() with pytest.raises(ValueError, match="format"): gb.io.to_pydata_sparse(w, format="badformat") with pytest.raises(TypeError, match="sparse.pydata"): diff --git a/graphblas/viz.py b/graphblas/viz.py index 72e18361a..89010bc3d 100644 --- a/graphblas/viz.py +++ b/graphblas/viz.py @@ -182,7 +182,7 @@ def datashade(M, agg="count", *, width=None, height=None, opts_kwargs=None, **kw images.extend(image_row) return hv.Layout(images).cols(ncols) - kwds = dict( # noqa: C408 + kwds = dict( # noqa: C408 pylint: disable=use-dict-literal x="col", y="row", c="val", diff --git a/notebooks/Louvain.ipynb b/notebooks/Louvain.ipynb index f1b042a49..eb523ef9e 100644 --- a/notebooks/Louvain.ipynb +++ b/notebooks/Louvain.ipynb @@ -270,7 +270,7 @@ " [0, 0, 1, 0, 0, 1, 0],\n", " ]\n", ")\n", - "g = gb.io.from_numpy(m)" + "g = Matrix.from_dense(m, missing_value=0)" ] }, { @@ -370,7 +370,7 @@ " [0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", " ]\n", ")\n", - "g = gb.io.from_numpy(m)" + "g = Matrix.from_dense(m, missing_value=0)" ] }, { @@ -469,7 +469,7 @@ " [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],\n", " ]\n", ")\n", - "g = gb.io.from_numpy(m)" + "g = Matrix.from_dense(m, missing_value=0)" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 083fd8656..adbf2d5b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,7 @@ viz = [ ] test = [ "pytest", + "packaging", "pandas >=1.2", "scipy >=1.8", ] @@ -94,6 +95,7 @@ complete = [ "sparse >=0.12", "matplotlib >=3.5", "pytest", + "packaging", ] [tool.setuptools] @@ -147,6 +149,9 @@ filterwarnings = [ "error", # MAINT: we can drop support for sparse <0.13 at any time "ignore:`np.bool` is a deprecated alias:DeprecationWarning:sparse._umath", # sparse <0.13 + # sparse 0.14.0 (2022-02-24) began raising this warning; it has been reported and fixed upstream. + "ignore:coords should be an ndarray. This will raise a ValueError:DeprecationWarning:sparse._coo.core", + # setuptools v67.3.0 deprecated `pkg_resources.declare_namespace` on 13 Feb 2023. See: # https://setuptools.pypa.io/en/latest/history.html#v67-3-0 # MAINT: check if this is still necessary in 2025 @@ -271,6 +276,7 @@ ignore = [ "PLR0913", # Too many arguments to function call "PLR0915", # Too many statements "PLR2004", # Magic number used in comparison, consider replacing magic with a constant variable + "PLW2901", # Outer for loop variable ... overwritten by inner assignment target (Note: good advice, but too strict) "RET502", # Do not implicitly `return None` in function able to return non-`None` value "RET503", # Missing explicit `return` at the end of function able to return non-`None` value "RET504", # Unnecessary variable assignment before `return` statement diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index ff7c88b32..d42952cf0 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -5,9 +5,10 @@ # Tip: add `--json` for more information. conda search 'numpy[channel=conda-forge]>=1.24.2' conda search 'pandas[channel=conda-forge]>=1.5.3' -conda search 'scipy[channel=conda-forge]>=1.10.0' +conda search 'scipy[channel=conda-forge]>=1.10.1' conda search 'networkx[channel=conda-forge]>=3.0' conda search 'awkward[channel=conda-forge]>=2.0.8' +conda search 'sparse[channel=conda-forge]>=0.14.0' conda search 'numba[channel=conda-forge]>=0.56.4' conda search 'pyyaml[channel=conda-forge]>=6.0' conda search 'flake8-comprehensions[channel=conda-forge]>=3.10.1'