Skip to content

Commit

Permalink
Connection to Awkward Arrays (python-graphblas#280)
Browse files Browse the repository at this point in the history
* Add io.to_awkward method

Converts a Vector or Matrix to an Awkward Array.
While an Awkward Array elegantly mimics pointers and values, it does
not naturally include the indices of a CSR matrix. However, Awkward
Arrays handle JSON-like structures of arrays, so we can easily add
values, indices, and even labels (in the case of hypersparse),
allowing a proper round trip journey from GraphBLAS to Awkward Arrays.

Awkward does not support iso-valued objects, so that will be expanded,
but yield equivalent information.

Awkward Array also does not currently support u64 for indexes, so we
are using i64. This should be fine as SuiteSparse:GraphBLAS only allows
indices up to 2**60, which means they fit in int64 even though GraphBLAS
internally stores them as uint64.
  • Loading branch information
jim22k authored Oct 3, 2022
1 parent 2ea41e6 commit ef0cdcc
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_and_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
black . --check --diff
- name: Build extension module
run: |
conda install -c conda-forge pandas numba scipy networkx cffi donfig pyyaml
conda install -c conda-forge pandas numba scipy networkx cffi donfig pyyaml awkward
if [[ ${{ matrix.cfg.sourcetype }} == "wheel" ]]; then
pip install suitesparse-graphblas
else
Expand Down
175 changes: 175 additions & 0 deletions graphblas/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,62 @@ def from_scipy_sparse(A, *, dup_op=None, name=None):
)


def from_awkward(A, *, name=None):
"""Create a Matrix or Vector from an Awkward Array.
The Awkward Array must have top-level parameters: format, shape
The Awkward Array must have top-level attributes based on format:
- vec/csr/csc: values, indices
- hypercsr/hypercsc: values, indices, offset_labels
Parameters
----------
A : awkward.Array
Awkward Array with values and indices
name : str, optional
Name of resulting Matrix or Vector
Returns
-------
Vector or Matrix
"""
params = A.layout.parameters
missing = {"format", "shape"} - params.keys()
if missing:
raise ValueError(f"Missing parameters: {missing}")
format = params["format"]
shape = params["shape"]

if len(shape) == 1:
if format != "vec":
raise ValueError(f"Invalid format for Vector: {format}")
return _Vector.from_values(
A.indices.layout.data, A.values.layout.data, size=shape[0], name=name
)
else:
if format not in {"csr", "csc", "hypercsr", "hypercsc"}:
raise ValueError(f"Invalid format for Matrix: {format}")
d = {
"format": format,
"nrows": shape[0],
"ncols": shape[1],
"values": A.values.layout.content.data,
"indptr": A.values.layout.offsets.data,
}
if format[-1] == "r":
indices = "col"
labels = "rows"
else:
indices = "row"
labels = "cols"
d[f"{indices}_indices"] = A.indices.layout.content.data
d[f"sorted_{indices}s"] = True
if format[:5] == "hyper":
d[labels] = A.offset_labels.layout.data
return _Matrix.ss.import_any(**d, name=name)


# TODO: add parameters to allow different networkx classes and attribute names
def to_networkx(m):
"""Create a networkx DiGraph from a square adjacency Matrix
Expand Down Expand Up @@ -319,6 +375,125 @@ def to_scipy_sparse(A, format="csr"):
return rv.asformat(format)


_AwkwardDoublyCompressedMatrix = None


def to_awkward(A, format=None):
"""Create an Awkward Array from a GraphBLAS Matrix
Parameters
----------
A : Matrix or Vector
GraphBLAS object to be converted
format : str {'csr', 'csc', 'hypercsr', 'hypercsc', 'vec}
Default format is csr for Matrix; vec for Vector
The Awkward Array will have top-level attributes based on format:
- vec/csr/csc: values, indices
- hypercsr/hypercsc: values, indices, offset_labels
Top-level parameters will also be set: format, shape
Returns
-------
awkward.Array
"""
import awkward._v2 as ak
from awkward._v2.forms.listoffsetform import ListOffsetForm
from awkward._v2.forms.numpyform import NumpyForm
from awkward._v2.forms.recordform import RecordForm

out_type = _output_type(A)
if format is None:
format = "vec" if out_type is _Vector else "csr"
format = format.lower()
classname = None

if out_type is _Vector:
if format != "vec":
raise ValueError(f"Invalid format for Vector: {format}")
size = A.nvals
indices, values = A.to_values()
form = RecordForm(
contents=[
NumpyForm(A.dtype.numba_type.name, form_key="node1"),
NumpyForm("int64", form_key="node0"),
],
fields=["values", "indices"],
)
d = {"node0-data": indices, "node1-data": values}

elif out_type is _Matrix:
if _backend != "suitesparse":
raise NotImplementedError(
f"Conversion of Matrix to Awkward Array not supported for backend '{_backend}'"
)
if format not in {"csr", "csc", "hypercsr", "hypercsc"}:
raise ValueError(f"Invalid format for Matrix: {format}")
if format[-1] == "r":
size = A.nrows
indices = "col_indices"
labels = "rows"
else:
size = A.ncols
indices = "row_indices"
labels = "cols"
info = A.ss.export(format, sort=True)
if info["is_iso"]:
info["values"] = _np.ascontiguousarray(_np.broadcast_to(info["values"], A.nvals))
form = ListOffsetForm(
"i64",
RecordForm(
contents=[
NumpyForm("int64", form_key="node3"),
NumpyForm(A.dtype.numba_type.name, form_key="node4"),
],
fields=["indices", "values"],
),
form_key="node1",
)
d = {
"node1-offsets": info["indptr"],
"node3-data": info[indices],
"node4-data": info["values"],
}
if format.startswith("hyper"):
global _AwkwardDoublyCompressedMatrix
if _AwkwardDoublyCompressedMatrix is None:
# Define behaviors to make all fields function at the top-level
@ak.behaviors.mixins.mixin_class(ak.behavior)
class _AwkwardDoublyCompressedMatrix:
@property
def values(self):
return self.data.values

@property
def indices(self):
return self.data.indices

size = len(info[labels])
form = RecordForm(
contents=[
form,
NumpyForm("int64", form_key="node5"),
],
fields=["data", "offset_labels"],
)
d["node5-data"] = info[labels]
classname = "_AwkwardDoublyCompressedMatrix"

else:
raise TypeError(f"A must be a Matrix or Vector, found {type(A)}")

ret = ak.from_buffers(form, size, d)
ret = ak.with_parameter(ret, "format", format)
ret = ak.with_parameter(ret, "shape", list(A.shape))
if classname:
ret = ak.with_name(ret, classname)
return ret


def mmread(source, *, dup_op=None, name=None):
"""Create a GraphBLAS Matrix from the contents of a Matrix Market file.
Expand Down
67 changes: 67 additions & 0 deletions graphblas/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
except ImportError: # pragma: no cover
ss = None

try:
import awkward._v2 as ak
except ImportError: # pragma: no cover
ak = None


@pytest.mark.skipif("not ss")
def test_vector_to_from_numpy():
Expand Down Expand Up @@ -302,3 +307,65 @@ def test_scipy_sparse():
assert sa.shape == M.shape
sa2 = gb.io.to_scipy_sparse(M, fmt)
assert (sa != sa2).nnz == 0


@pytest.mark.skipif("not ak")
def test_awkward_roundtrip():
# Vector
v = gb.Vector.from_values([1, 3, 5], [20, 21, -5], size=22)
for dtype in ["int16", "float32", "bool"]:
v1 = v.dup(dtype=dtype)
kv = gb.io.to_awkward(v1)
assert isinstance(kv, ak.Array)
v2 = gb.io.from_awkward(kv)
assert v2.isequal(v1)
# Matrix
m = gb.Matrix.from_values([0, 0, 3, 5], [1, 4, 0, 2], [1, 0, 2, -1], nrows=7, ncols=6)
for dtype in ["int16", "float32", "bool"]:
for format in ["csr", "csc", "hypercsr", "hypercsc"]:
m1 = m.dup(dtype=dtype)
km = gb.io.to_awkward(m1, format=format)
assert isinstance(km, ak.Array)
m2 = gb.io.from_awkward(km)
assert m2.isequal(m1)


@pytest.mark.skipif("not ak")
def test_awkward_iso_roundtrip():
# Vector
v = gb.Vector.from_values([1, 3, 5], [20, 20, 20], size=22)
assert v.ss.is_iso
kv = gb.io.to_awkward(v)
assert isinstance(kv, ak.Array)
v2 = gb.io.from_awkward(kv)
assert v2.isequal(v)
# Matrix
m = gb.Matrix.from_values([0, 0, 3, 5], [1, 4, 0, 2], [1, 1, 1, 1], nrows=7, ncols=6)
assert m.ss.is_iso
for format in ["csr", "csc", "hypercsr", "hypercsc"]:
km = gb.io.to_awkward(m, format=format)
assert isinstance(km, ak.Array)
m2 = gb.io.from_awkward(km)
assert m2.isequal(m)


@pytest.mark.skipif("not ak")
def test_awkward_errors():
v = gb.Vector.from_values([1, 3, 5], [20, 20, 20], size=22)
m = gb.Matrix.from_values([0, 0, 3, 5], [1, 4, 0, 2], [1, 1, 1, 1], nrows=7, ncols=6)
with pytest.raises(ValueError, match="Missing parameters"):
gb.io.from_awkward(ak.Array([1, 2, 3]))
with pytest.raises(ValueError, match="Invalid format for Vector"):
kv = gb.io.to_awkward(v)
kv = ak.with_parameter(kv, "format", "csr")
gb.io.from_awkward(kv)
with pytest.raises(ValueError, match="Invalid format for Matrix"):
km = gb.io.to_awkward(m)
km = ak.with_parameter(km, "format", "dcsr")
gb.io.from_awkward(km)
with pytest.raises(ValueError, match="Invalid format for Vector"):
gb.io.to_awkward(v, format="csr")
with pytest.raises(ValueError, match="Invalid format for Matrix"):
gb.io.to_awkward(m, format="dcsr")
with pytest.raises(TypeError):
gb.io.to_awkward(gb.Scalar.from_value(5))
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

extras_require = {
"repr": ["pandas"],
"io": ["networkx", "scipy >=1.7.0"],
"io": ["networkx", "scipy >=1.7.0", "awkward"],
"viz": ["matplotlib"],
"test": ["pytest", "pandas"],
}
Expand Down

0 comments on commit ef0cdcc

Please sign in to comment.