Skip to content

Commit

Permalink
Add conversions to/from pydata/sparse to graphblas.io (python-graphbl…
Browse files Browse the repository at this point in the history
…as#347)

Add conversions to and from `sparse >=0.12` (a.k.a. PyData/sparse)
  • Loading branch information
SultanOrazbayev authored Jan 23, 2023
1 parent f66d8c8 commit c54a274
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 9 deletions.
13 changes: 7 additions & 6 deletions .github/workflows/test_and_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ jobs:
# Consider removing old versions when they become problematic or very old (>=2 years).
nxver=$(python -c 'import random ; print(random.choice(["=2.7", "=2.8", "=3.0", ""]))')
yamlver=$(python -c 'import random ; print(random.choice(["=5.4", "=6.0", ""]))')
sparsever=$(python -c 'import random ; print(random.choice(["=0.12", "=0.13", ""]))')
if [[ ${{ steps.pyver.outputs.selected }} == "3.8" ]]; then
npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))')
spver=$(python -c 'import random ; print(random.choice(["=1.8", "=1.9", "=1.10", ""]))')
Expand Down Expand Up @@ -183,10 +184,10 @@ jobs:
else
numbaver=$(python -c 'import random ; print(random.choice(["=0.56", ""]))')
fi
echo "versions: np${npver} sp${spver} pd${pdver} ak${akver} nx${nxver} numba${numbaver} yaml${yamlver} psgver${psgver}"
echo "versions: np${npver} sp${spver} pd${pdver} ak${akver} nx${nxver} numba${numbaver} yaml${yamlver} sparse${sparsever} psgver${psgver}"
# Once we have wheels for all OSes, we can delete the last two lines.
mamba install pytest coverage coveralls=3.3.1 pytest-randomly cffi donfig pyyaml${yamlver} \
mamba install pytest coverage coveralls=3.3.1 pytest-randomly cffi donfig pyyaml${yamlver} sparse${sparsever} \
pandas${pdver} scipy${spver} numpy${npver} awkward${akver} networkx${nxver} numba${numbaver} \
${{ matrix.slowtask == 'pytest_bizarro' && 'black' || '' }} \
${{ matrix.slowtask == 'notebooks' && 'matplotlib nbconvert jupyter "ipython>=7"' || '' }} \
Expand Down Expand Up @@ -233,7 +234,7 @@ jobs:
if [[ $H && $normal ]] ; then if [[ $macos ]] ; then echo " $vanilla" ; elif [[ $windows ]] ; then echo " $suitesparse" ; fi ; fi)$( \
if [[ $H && $bizarro ]] ; then if [[ $macos ]] ; then echo " $suitesparse" ; elif [[ $windows ]] ; then echo " $vanilla" ; fi ; fi)
echo $args
coverage run -m pytest --color=yes -Werror --randomly -v $args \
coverage run -m pytest --color=yes --randomly -v $args \
${{ matrix.slowtask == 'pytest_normal' && '--runslow' || '' }}
- name: Unit tests (bizarro scalars)
run: |
Expand Down Expand Up @@ -268,7 +269,7 @@ jobs:
if [[ $H && $normal ]] ; then if [[ $macos ]] ; then echo " $suitesparse" ; elif [[ $windows ]] ; then echo " $vanilla" ; fi ; fi)$( \
if [[ $H && $bizarro ]] ; then if [[ $macos ]] ; then echo " $vanilla" ; elif [[ $windows ]] ; then echo " $suitesparse" ; fi ; fi)
echo $args
coverage run -a -m pytest --color=yes -Werror --randomly -v $args \
coverage run -a -m pytest --color=yes --randomly -v $args \
${{ matrix.slowtask == 'pytest_bizarro' && '--runslow' || '' }}
git checkout . # Undo changes to scalar default
- name: Miscellaneous tests
Expand All @@ -289,8 +290,8 @@ jobs:
rm script.py
# Tests whose coverage depend on order of tests :/
# TODO: understand why these are order-dependent and try to fix
coverage run -a -m pytest --color=yes -Werror -x --no-mapnumpy -k test_binaryop_attributes_numpy graphblas/tests/test_op.py
# coverage run -a -m pytest --color=yes -Werror -x --no-mapnumpy -k test_npmonoid graphblas/tests/test_numpyops.py --runslow
coverage run -a -m pytest --color=yes -x --no-mapnumpy -k test_binaryop_attributes_numpy graphblas/tests/test_op.py
# coverage run -a -m pytest --color=yes -x --no-mapnumpy -k test_npmonoid graphblas/tests/test_numpyops.py --runslow
- name: Auto-generated code check
if: matrix.slowtask == 'pytest_bizarro'
run: |
Expand Down
9 changes: 9 additions & 0 deletions docs/api_reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,15 @@ These methods require `scipy <https://scipy.org/>`_ to be installed.

.. autofunction:: graphblas.io.to_scipy_sparse

PyData Sparse
~~~~~~~~~~~~~

These methods require `sparse <https://sparse.pydata.org/>`_ to be installed.

.. autofunction:: graphblas.io.from_pydata_sparse

.. autofunction:: graphblas.io.to_pydata_sparse

Matrix Market
~~~~~~~~~~~~~

Expand Down
8 changes: 8 additions & 0 deletions docs/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ that drop zero-weighted edges. The conversion from python-graphblas to scipy.spa
zero-weighted edges, but the user should be aware of the potential for errors occurring when zero-weighted
edges are handled by scipy.sparse.

PyData.Sparse
-------------

A python-graphblas Matrix can be created from a 2-D (PyData) sparse array or matrix using
``gb.io.from_pydata_sparse()``.

``gb.io.to_pydata_sparse()`` will output a 2-D (PyData) sparse array given a python-graphblas Matrix.
The sparse format can be specified. It defaults to "coo".

Numpy (Dense)
-------------
Expand Down
91 changes: 91 additions & 0 deletions graphblas/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,60 @@ def from_awkward(A, *, name=None):
raise ValueError(f"Invalid format for Matrix: {format}")


def from_pydata_sparse(s, *, dup_op=None, name=None):
"""Create a Vector or a Matrix from a pydata.sparse array or matrix.
Input data in "gcxs" format will be efficient when importing with SuiteSparse:GraphBLAS.
Parameters
----------
s : sparse
PyData sparse array or matrix (see https://sparse.pydata.org)
dup_op : BinaryOp, optional
Aggregation function for formats that allow duplicate entries (e.g. coo)
name : str, optional
Name of resulting Matrix
Returns
-------
:class:`~graphblas.Vector`
:class:`~graphblas.Matrix`
"""
try:
import sparse
except ImportError: # pragma: no cover (import)
raise ImportError("sparse is required to import from pydata sparse") from None
if not isinstance(s, sparse.SparseArray):
raise TypeError(
"from_pydata_sparse only accepts objects from the `sparse` library; "
"see https://sparse.pydata.org"
)
if s.ndim > 2:
raise _GraphblasException("m.ndim must be <= 2")

if s.ndim == 1:
# the .asformat('coo') makes it easier to convert dok/gcxs using a single approach
_s = s.asformat("coo")
return _Vector.from_coo(
_s.coords, _s.data, dtype=_s.dtype, size=_s.shape[0], dup_op=dup_op, name=name
)
# handle two-dimensional arrays
if isinstance(s, sparse.GCXS):
return from_scipy_sparse(s.to_scipy_sparse(), dup_op=dup_op, name=name)
if isinstance(s, (sparse.DOK, sparse.COO)):
_s = s.asformat("coo")
return _Matrix.from_coo(
*_s.coords,
_s.data,
nrows=_s.shape[0],
ncols=_s.shape[1],
dtype=_s.dtype,
dup_op=dup_op,
name=name,
)
raise ValueError(f"Unknown sparse array type: {type(s).__name__}") # pragma: no cover (safety)


# TODO: add parameters to allow different networkx classes and attribute names
def to_networkx(m, edge_attribute="weight"):
"""Create a networkx DiGraph from a square adjacency Matrix.
Expand Down Expand Up @@ -482,6 +536,43 @@ def indices(self):
return ret


def to_pydata_sparse(A, format="coo"):
"""Create a pydata.sparse array from a GraphBLAS Matrix or Vector.
Parameters
----------
A : Matrix or Vector
GraphBLAS object to be converted
format : str
{'coo', 'dok', 'gcxs'}
Returns
-------
sparse array (see https://sparse.pydata.org)
"""
try:
from sparse import COO
except ImportError: # pragma: no cover (import)
raise ImportError("sparse is required to export to pydata sparse") from None

format = format.lower()
if format not in {"coo", "dok", "gcxs"}:
raise ValueError(f"Invalid format: {format}")

if format == "gcxs":
B = to_scipy_sparse(A, format="csr")
else:
# obtain an intermediate conversion via hardcoded 'coo' intermediate object
B = to_scipy_sparse(A, format="coo")

# convert to pydata.sparse
s = COO.from_scipy_sparse(B)

# express in the desired format
return s.asformat(format)


def mmread(source, *, dup_op=None, name=None):
"""Create a GraphBLAS Matrix from the contents of a Matrix Market file.
Expand Down
56 changes: 55 additions & 1 deletion graphblas/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,23 @@

import graphblas as gb
from graphblas import Matrix, dtypes
from graphblas.exceptions import GraphblasException

try:
import networkx as nx
except ImportError: # pragma: no cover (import)
nx = None

try:
import scipy.sparse as ss
except ImportError: # pragma: no cover (import)
ss = None

try:
import sparse
except ImportError: # pragma: no cover (import)
sparse = None

try:
import awkward._v2 as ak
except ImportError:
Expand Down Expand Up @@ -89,7 +96,7 @@ def test_matrix_to_from_numpy():
with pytest.raises(ValueError, match="Invalid format"):
gb.io.to_scipy_sparse(M, "bad format")

with pytest.raises(gb.exceptions.GraphblasException, match="ndim must be"):
with pytest.raises(GraphblasException, match="ndim must be"):
gb.io.from_numpy(np.array([[[1.0, 0.0], [2.0, 3.7]]]))


Expand Down Expand Up @@ -386,3 +393,50 @@ def test_awkward_errors():
gb.io.to_awkward(m, format="dcsr")
with pytest.raises(TypeError):
gb.io.to_awkward(gb.Scalar.from_value(5))


@pytest.mark.skipif("not sparse")
def test_vector_to_from_pydata_sparse():
coords = np.array([0, 1, 2, 3, 4], dtype="int64")
data = np.array([10, 20, 30, 40, 50], dtype="int64")
s = sparse.COO(coords, data, shape=(5,))
v = gb.io.from_pydata_sparse(s)
assert v.isequal(gb.Vector.from_coo(coords, data, dtype=dtypes.INT64), check_dtype=True)

t = gb.io.to_pydata_sparse(v)
assert t == s


@pytest.mark.skipif("not sparse")
def test_matrix_to_from_pydata_sparse():
coords = np.array([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], dtype="int64")
data = np.array([10, 20, 30, 40, 50], dtype="int64")
s = sparse.COO(coords, data, shape=(5, 5))
v = gb.io.from_pydata_sparse(s)
assert v.isequal(gb.Matrix.from_coo(*coords, data, dtype=dtypes.INT64), check_dtype=False)

t = gb.io.to_pydata_sparse(v)
assert t == s

# test ndim
e = sparse.random(shape=(5, 5, 5), density=0)
with pytest.raises(GraphblasException):
gb.io.from_pydata_sparse(e)

# test GCXS array conversion
indptr = np.array([0, 2, 3, 6], dtype="int64")
indices = np.array([0, 2, 2, 0, 1, 2], dtype="int64")
data = np.array([1, 2, 3, 4, 5, 6], dtype="int64")

g = sparse.GCXS((data, indices, indptr), shape=(3, 3), compressed_axes=[0])
w = gb.io.from_pydata_sparse(g)
coords = g.asformat("coo").coords
data = g.asformat("coo").data
assert w.isequal(gb.Matrix.from_coo(*coords, data, dtype=dtypes.INT64), check_dtype=False)

r = gb.io.to_pydata_sparse(w, format="gcxs")
assert r == g
with pytest.raises(ValueError, match="format"):
gb.io.to_pydata_sparse(w, format="badformat")
with pytest.raises(TypeError, match="sparse.pydata"):
gb.io.from_pydata_sparse(w)
12 changes: 10 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,10 @@ io = [
"networkx >=2.8",
"scipy >=1.8",
"awkward >=1.9",
"sparse >=0.12",
]
viz = [
"matplotlib",
"matplotlib >=3.5",
]
test = [
"pytest",
Expand All @@ -83,7 +84,8 @@ complete = [
"networkx >=2.8",
"scipy >=1.8",
"awkward >=1.9",
"matplotlib",
"sparse >=0.12",
"matplotlib >=3.5",
"pytest",
]

Expand Down Expand Up @@ -132,6 +134,12 @@ xfail_strict = true
markers = [
"slow: Skipped unless --runslow passed",
]
filterwarnings = [
# See: https://docs.python.org/3/library/warnings.html#describing-warning-filters
# and: https://docs.pytest.org/en/7.2.x/how-to/capture-warnings.html#controlling-warnings
"error",
"ignore:`np.bool` is a deprecated alias:DeprecationWarning:sparse._umath", # sparse <0.13
]

[tool.coverage.run]
branch = true
Expand Down

0 comments on commit c54a274

Please sign in to comment.