Skip to content

Commit

Permalink
Improve construction when inferring sub-array dtype (a.k.a. array sub…
Browse files Browse the repository at this point in the history
…dtype) (python-graphblas#381)

Improve construction when inferring sub-array dtype (a.k.a. array subdtype)
  • Loading branch information
eriknw authored Feb 4, 2023
1 parent e7a9a31 commit ed6f3ba
Show file tree
Hide file tree
Showing 12 changed files with 154 additions and 47 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_and_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ jobs:
npver=$(python -c 'import random ; print(random.choice(["=1.23", ""]))')
spver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", ""]))')
pdver=$(python -c 'import random ; print(random.choice(["=1.5", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0.5", "=2.0.6", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0.5", "=2.0.6", "=2.0.7", ""]))')
fi
if [[ ${{ steps.sourcetype.outputs.selected }} == "source" || ${{ steps.sourcetype.outputs.selected }} == "upstream" ]]; then
# TODO: there are currently issues with some numpy versions when
Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ repos:
- id: validate-pyproject
name: Validate pyproject.toml
- repo: https://github.com/myint/autoflake
rev: v2.0.0
rev: v2.0.1
hooks:
- id: autoflake
args: [--in-place]
Expand All @@ -44,7 +44,7 @@ repos:
- id: auto-walrus
args: [--line-length, "100"]
- repo: https://github.com/psf/black
rev: 22.12.0
rev: 23.1.0
hooks:
- id: black
- id: black-jupyter
Expand All @@ -71,7 +71,7 @@ repos:
additional_dependencies: [tomli]
files: ^(graphblas|docs)/
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.237
rev: v0.0.241
hooks:
- id: ruff
- repo: https://github.com/sphinx-contrib/sphinx-lint
Expand Down
30 changes: 13 additions & 17 deletions graphblas/core/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ def from_coo(
"""
rows = ints_to_numpy_buffer(rows, np.uint64, name="row indices")
columns = ints_to_numpy_buffer(columns, np.uint64, name="column indices")
values, new_dtype = values_to_numpy_buffer(values, dtype)
values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1)
# Compute nrows and ncols if not provided
if nrows is None:
if rows.size == 0:
Expand All @@ -882,11 +882,8 @@ def from_coo(
if columns.size == 0:
raise ValueError("No column indices provided. Unable to infer ncols.")
ncols = int(columns.max()) + 1
if dtype is None and values.ndim > 1:
# Look for array-subtdype
new_dtype = lookup_dtype(np.dtype((new_dtype.np_type, values.shape[1:])))
# Create the new matrix
C = cls(new_dtype, nrows, ncols, name=name)
C = cls(dtype, nrows, ncols, name=name)
if values.ndim == 0:
if dup_op is not None:
raise ValueError(
Expand Down Expand Up @@ -1004,7 +1001,7 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name):
indices_name = "row indices"
indptr = ints_to_numpy_buffer(indptr, np.uint64, name="index pointers")
indices = ints_to_numpy_buffer(indices, np.uint64, name=indices_name)
values, new_dtype = values_to_numpy_buffer(values, dtype)
values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1)
if num is None:
if indices.size > 0:
num = int(indices.max()) + 1
Expand All @@ -1026,9 +1023,6 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name):
"ncols must be None or equal to len(indptr) - 1; "
f"expected {check_num}, got {ncols}"
)
if dtype is None and values.ndim > 1:
# Look for array-subtdype
new_dtype = lookup_dtype(np.dtype((new_dtype.np_type, values.shape[1:])))
if values.ndim == 0:
if backend == "suitesparse":
# SuiteSparse GxB can handle iso-value
Expand All @@ -1055,21 +1049,21 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name):
)
values = np.broadcast_to(values, indices.size)
new_mat = ffi_new("GrB_Matrix*")
rv = Matrix._from_obj(new_mat, new_dtype, nrows, ncols, name=name)
if new_dtype._is_udt:
rv = Matrix._from_obj(new_mat, dtype, nrows, ncols, name=name)
if dtype._is_udt:
dtype_name = "UDT"
else:
dtype_name = new_dtype.name
dtype_name = dtype.name
call(
f"GrB_Matrix_import_{dtype_name}",
[
_Pointer(rv),
new_dtype,
dtype,
_as_scalar(nrows, _INDEX, is_cscalar=True),
_as_scalar(ncols, _INDEX, is_cscalar=True),
_CArray(indptr),
_CArray(indices),
_CArray(values, dtype=new_dtype),
_CArray(values, dtype=dtype),
_as_scalar(indptr.size, _INDEX, is_cscalar=True),
_as_scalar(indices.size, _INDEX, is_cscalar=True),
_as_scalar(values.shape[0], _INDEX, is_cscalar=True),
Expand Down Expand Up @@ -1436,12 +1430,14 @@ def from_dicts(
col_indices = np.fromiter(itertools.chain.from_iterable(dicts), np.uint64)
iter_values = itertools.chain.from_iterable(v.values() for v in dicts)
if dtype is None:
values = np.array(list(iter_values))
dtype = lookup_dtype(values.dtype)
values, dtype = values_to_numpy_buffer(list(iter_values), subarray_after=1)
else:
# If we know the dtype, then using `np.fromiter` is much faster
dtype = lookup_dtype(dtype)
values = np.fromiter(iter_values, dtype.np_type)
if dtype.np_type.subdtype is not None and np.__version__[:5] in {"1.21.", "1.22."}:
values, dtype = values_to_numpy_buffer(list(iter_values), dtype)
else:
values = np.fromiter(iter_values, dtype.np_type)
return getattr(cls, methodname)(
*args, indptr, col_indices, values, dtype, nrows=nrows, ncols=ncols, name=name
)
Expand Down
6 changes: 3 additions & 3 deletions graphblas/core/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1313,7 +1313,7 @@ def _initialize(cls):
op._typed_ops[dtype] = typed_op
op.coercions[dtype] = target_type
# Allow some functions to work on UDTs
for (unop, func) in [
for unop, func in [
(unary.identity, _identity),
(unary.one, _one),
]:
Expand Down Expand Up @@ -2287,7 +2287,7 @@ def _initialize(cls):
# If the inputs are FP32, we use DIV_FP32; use DIV_FP64 for all other input dtypes
truediv = binary.truediv = op.truediv = BinaryOp("truediv")
rtruediv = binary.rtruediv = op.rtruediv = BinaryOp("rtruediv")
for (new_op, builtin_op) in [(truediv, binary.cdiv), (rtruediv, binary.rdiv)]:
for new_op, builtin_op in [(truediv, binary.cdiv), (rtruediv, binary.rdiv)]:
for dtype in builtin_op.types:
if dtype.name in {"FP32", "FC32", "FC64"}:
orig_dtype = dtype
Expand Down Expand Up @@ -2420,7 +2420,7 @@ def _initialize(cls):
left._semiring_commutes_to = right
right._semiring_commutes_to = left
# Allow some functions to work on UDTs
for (binop, func) in [
for binop, func in [
(binary.first, _first),
(binary.second, _second),
(binary.pair, _pair),
Expand Down
34 changes: 25 additions & 9 deletions graphblas/core/ss/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,7 +1305,9 @@ def _import_csr(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if col_indices is values:
values = np.copy(values)
Ap = ffi_new("GrB_Index**", ffi.from_buffer("GrB_Index*", indptr))
Expand Down Expand Up @@ -1493,7 +1495,9 @@ def _import_csc(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if row_indices is values:
values = np.copy(values)
Ap = ffi_new("GrB_Index**", ffi.from_buffer("GrB_Index*", indptr))
Expand Down Expand Up @@ -1696,7 +1700,9 @@ def _import_hypercsr(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if not is_iso and values.ndim == 0:
is_iso = True
if col_indices is values:
Expand Down Expand Up @@ -1917,7 +1923,9 @@ def _import_hypercsc(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if row_indices is values:
values = np.copy(values)
if not is_iso and values.ndim == 0:
Expand Down Expand Up @@ -2122,7 +2130,9 @@ def _import_bitmapr(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True, order="C")
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, order="C", subarray_after=2
)
if bitmap is values:
values = np.copy(values)
if method == "import":
Expand Down Expand Up @@ -2313,7 +2323,9 @@ def _import_bitmapc(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True, order="F")
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, order="F", subarray_after=2
)
if bitmap is values:
values = np.copy(values)
if method == "import":
Expand Down Expand Up @@ -2486,7 +2498,9 @@ def _import_fullr(
copy = not take_ownership
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, order="C", ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, order="C", ownable=True, subarray_after=2
)
if method == "import":
nrows, ncols = get_shape(nrows, ncols, dtype, values=values)
else:
Expand Down Expand Up @@ -2643,7 +2657,9 @@ def _import_fullc(
copy = not take_ownership
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, order="F", ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, order="F", ownable=True, subarray_after=2
)
if method == "import":
nrows, ncols = get_shape(nrows, ncols, dtype, values=values)
else:
Expand Down Expand Up @@ -2848,7 +2864,7 @@ def _import_coo(

if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype)
values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1)
if method == "import":
matrix = gb.Matrix(dtype, nrows=nrows, ncols=ncols, name=name)
if is_iso:
Expand Down
12 changes: 9 additions & 3 deletions graphblas/core/ss/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,9 @@ def _import_sparse(
indices = ints_to_numpy_buffer(indices, np.uint64, copy=copy, ownable=True, name="indices")
if method == "pack":
dtype = vector.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if indices is values:
values = np.copy(values)
vi = ffi_new("GrB_Index**", ffi.from_buffer("GrB_Index*", indices))
Expand Down Expand Up @@ -1150,7 +1152,9 @@ def _import_bitmap(
if method == "pack":
dtype = vector.dtype
size = vector._size
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if bitmap is values:
values = np.copy(values)
vhandle = ffi_new("GrB_Vector*")
Expand Down Expand Up @@ -1320,7 +1324,9 @@ def _import_full(
if method == "pack":
dtype = vector.dtype
size = vector._size
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
vhandle = ffi_new("GrB_Vector*")
vx = ffi_new("void**", ffi.from_buffer("void*", values))
if size is None:
Expand Down
18 changes: 17 additions & 1 deletion graphblas/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,21 @@ def _get_subdtype(dtype):
return dtype


def values_to_numpy_buffer(array, dtype=None, *, copy=False, ownable=False, order="C"):
def values_to_numpy_buffer(
array, dtype=None, *, copy=False, ownable=False, order="C", subarray_after=None
):
"""Convert an array-like object to a numpy array and infer the dtype if necessary.
Parameters
----------
subarray_after : int, optional
If dtype is not provided, infer "sub-array" dtype if the array has extra dimensions.
Returns
-------
np.ndarray
dtype
"""
if dtype is not None:
dtype = lookup_dtype(dtype)
array = np.array(array, _get_subdtype(dtype.np_type), copy=copy, order=order)
Expand All @@ -85,6 +99,8 @@ def values_to_numpy_buffer(array, dtype=None, *, copy=False, ownable=False, orde
# fix for win64 numpy handling of ints
array = array.astype(np.int64)
dtype = lookup_dtype(array.dtype)
if subarray_after is not None and array.ndim > subarray_after:
dtype = lookup_dtype(np.dtype((dtype.np_type, array.shape[subarray_after:])))
if ownable and (not array.flags.owndata or not array.flags.writeable):
array = array.copy(order)
return array, dtype
Expand Down
15 changes: 7 additions & 8 deletions graphblas/core/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,17 +725,14 @@ def from_coo(cls, indices, values=1.0, dtype=None, *, size=None, dup_op=None, na
Vector
"""
indices = ints_to_numpy_buffer(indices, np.uint64, name="indices")
values, new_dtype = values_to_numpy_buffer(values, dtype)
values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1)
# Compute size if not provided
if size is None:
if indices.size == 0:
raise ValueError("No indices provided. Unable to infer size.")
size = int(indices.max()) + 1
if dtype is None and values.ndim > 1:
# Look for array-subtdype
new_dtype = lookup_dtype(np.dtype((new_dtype.np_type, values.shape[1:])))
# Create the new vector
w = cls(new_dtype, size, name=name)
w = cls(dtype, size, name=name)
if values.ndim == 0:
if dup_op is not None:
raise ValueError(
Expand Down Expand Up @@ -1791,12 +1788,14 @@ def from_dict(cls, d, dtype=None, *, size=None, name=None):
"""
indices = np.fromiter(d.keys(), np.uint64)
if dtype is None:
values = np.array(list(d.values())) # let numpy infer dtype
dtype = lookup_dtype(values.dtype)
values, dtype = values_to_numpy_buffer(list(d.values()), subarray_after=1)
else:
# If we know the dtype, then using `np.fromiter` is much faster
dtype = lookup_dtype(dtype)
values = np.fromiter(d.values(), dtype.np_type)
if dtype.np_type.subdtype is not None and np.__version__[:5] in {"1.21.", "1.22."}:
values, dtype = values_to_numpy_buffer(list(d.values()), dtype)
else:
values = np.fromiter(d.values(), dtype.np_type)
if size is None and indices.size == 0:
size = 0
return cls.from_coo(indices, values, dtype, size=size, name=name)
Expand Down
35 changes: 35 additions & 0 deletions graphblas/tests/test_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -3612,6 +3612,7 @@ def test_ss_iteration(A):
assert next(A.ss.iteritems()) is not None


@pytest.mark.slow
def test_udt():
record_dtype = np.dtype([("x", np.bool_), ("y", np.float64)], align=True)
udt = dtypes.register_anonymous(record_dtype, "MatrixUDT")
Expand Down Expand Up @@ -4231,3 +4232,37 @@ def test_ss_descriptors(A):
else:
with pytest.raises(ValueError, match="escriptor"):
(A @ A).new(nthreads=4, axb_method="dot", sort=True)


def test_subarray_dtypes():
a = np.arange(3 * 4, dtype=np.int64).reshape(3, 4)
A = Matrix.from_coo([1, 3, 5], [0, 1, 3], a)
B = Matrix("INT64[4]", nrows=6, ncols=4)
B[1, 0] = [0, 1, 2, 3]
B[3, 1] = [4, 5, 6, 7]
B[5, 3] = [8, 9, 10, 11]
assert A.isequal(B, check_dtype=True)
for method in ["coo", "csr", "csc", "dcsr", "dcsc", "edgelist"]:
B = getattr(A, f"from_{method}")(*getattr(A, f"to_{method}")())
B = Matrix.from_dicts(A.to_dicts())
assert A.isequal(B, check_dtype=True)
B = Matrix.from_dicts(A.to_dicts(), A.dtype)
assert A.isequal(B, check_dtype=True)

b1 = np.arange(2 * 3 * 4, dtype=np.int64).reshape(2 * 3, 4)
b2 = np.arange(2 * 3 * 4, dtype=np.int64).reshape(2, 3, 4)
Full1 = Matrix.from_coo([0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2], b1)
Full2 = Matrix("INT64[4]", nrows=2, ncols=3)
Full2[0, 0] = [0, 1, 2, 3]
Full2[0, 1] = [4, 5, 6, 7]
Full2[0, 2] = [8, 9, 10, 11]
Full2[1, 0] = [12, 13, 14, 15]
Full2[1, 1] = [16, 17, 18, 19]
Full2[1, 2] = [20, 21, 22, 23]
assert Full1.isequal(Full2, check_dtype=True)
Full2 = Matrix("INT64[4]", nrows=2, ncols=3)
Full2[:, :] = b2
assert Full1.isequal(Full2, check_dtype=True)
if suitesparse:
Full2 = Matrix.ss.import_fullr(b2)
assert Full1.isequal(Full2, check_dtype=True)
Loading

0 comments on commit ed6f3ba

Please sign in to comment.