diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4eb2db4d0..04a837493 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,7 +56,7 @@ repos: # These versions need updated manually - flake8==6.0.0 - flake8-comprehensions==3.10.1 - - flake8-bugbear==23.1.20 + - flake8-bugbear==23.2.13 - flake8-simplify==0.19.3 - repo: https://github.com/asottile/yesqa rev: v1.4.0 @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.241 + rev: v0.0.247 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/graphblas/agg/__init__.py b/graphblas/agg/__init__.py index 1447dd932..f2dddb851 100644 --- a/graphblas/agg/__init__.py +++ b/graphblas/agg/__init__.py @@ -91,6 +91,7 @@ def __getattr__(key): f"`{key}` is specific to SuiteSparse:GraphBLAS. " f"`gb.agg.{key}` will be removed in version 2023.9.0 or later.", DeprecationWarning, + stacklevel=2, ) rv = _deprecated[key] globals()[key] = rv diff --git a/graphblas/binary/__init__.py b/graphblas/binary/__init__.py index 089869eb7..e59c0405e 100644 --- a/graphblas/binary/__init__.py +++ b/graphblas/binary/__init__.py @@ -24,6 +24,7 @@ def __getattr__(key): f"`{key}` is specific to SuiteSparse:GraphBLAS. " f"`gb.binary.{key}` will be removed in version 2023.9.0 or later.", DeprecationWarning, + stacklevel=2, ) rv = _deprecated[key] globals()[key] = rv diff --git a/graphblas/core/automethods.py b/graphblas/core/automethods.py index 5b8fb5726..d370469e7 100644 --- a/graphblas/core/automethods.py +++ b/graphblas/core/automethods.py @@ -261,6 +261,10 @@ def to_dcsr(self): return self._get_value("to_dcsr") +def to_dense(self): + return self._get_value("to_dense") + + def to_dict(self): return self._get_value("to_dict") @@ -389,6 +393,7 @@ def _main(): "reposition", "ss", "to_coo", + "to_dense", "to_values", } vector = { diff --git a/graphblas/core/infix.py b/graphblas/core/infix.py index 22b1c5dca..1fc7caa95 100644 --- a/graphblas/core/infix.py +++ b/graphblas/core/infix.py @@ -208,6 +208,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): else: ss = Vector.__dict__["ss"] # raise if used to_coo = wrapdoc(Vector.to_coo)(property(automethods.to_coo)) + to_dense = wrapdoc(Vector.to_dense)(property(automethods.to_dense)) to_dict = wrapdoc(Vector.to_dict)(property(automethods.to_dict)) to_values = wrapdoc(Vector.to_values)(property(automethods.to_values)) vxm = wrapdoc(Vector.vxm)(property(automethods.vxm)) @@ -343,6 +344,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): to_csr = wrapdoc(Matrix.to_csr)(property(automethods.to_csr)) to_dcsc = wrapdoc(Matrix.to_dcsc)(property(automethods.to_dcsc)) to_dcsr = wrapdoc(Matrix.to_dcsr)(property(automethods.to_dcsr)) + to_dense = wrapdoc(Matrix.to_dense)(property(automethods.to_dense)) to_dicts = wrapdoc(Matrix.to_dicts)(property(automethods.to_dicts)) to_edgelist = wrapdoc(Matrix.to_edgelist)(property(automethods.to_edgelist)) to_values = wrapdoc(Matrix.to_values)(property(automethods.to_values)) diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index 6c0419c98..8b9b4b678 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -28,6 +28,7 @@ class_property, get_order, ints_to_numpy_buffer, + normalize_values, output_type, values_to_numpy_buffer, wrapdoc, @@ -472,6 +473,7 @@ def to_values(self, dtype=None, *, rows=True, columns=True, values=True, sort=Tr warnings.warn( "`Matrix.to_values(...)` is deprecated; please use `Matrix.to_coo(...)` instead.", DeprecationWarning, + stacklevel=2, ) return self.to_coo(dtype, rows=rows, columns=columns, values=values, sort=sort) @@ -496,6 +498,7 @@ def to_coo(self, dtype=None, *, rows=True, columns=True, values=True, sort=True) See Also -------- + to_dense to_edgelist from_coo @@ -528,11 +531,7 @@ def to_coo(self, dtype=None, *, rows=True, columns=True, values=True, sort=True) [c_rows, c_columns, c_values, _Pointer(scalar), self], ) if values: - c_values = c_values.array - if dtype is not None: - dtype = lookup_dtype(dtype) - if dtype != self.dtype: - c_values = c_values.astype(dtype.np_type) # copies + c_values = normalize_values(self, c_values.array, dtype) if sort and backend != "suitesparse": col = c_columns.array row = c_rows.array @@ -567,6 +566,7 @@ def to_edgelist(self, dtype=None, *, values=True, sort=True): See Also -------- to_coo + to_dense from_edgelist Returns @@ -811,6 +811,7 @@ def from_values( warnings.warn( "`Matrix.from_values(...)` is deprecated; please use `Matrix.from_coo(...)` instead.", DeprecationWarning, + stacklevel=2, ) return cls.from_coo( rows, columns, values, dtype, nrows=nrows, ncols=ncols, dup_op=dup_op, name=name @@ -856,6 +857,7 @@ def from_coo( See Also -------- + from_dense from_edgelist to_coo @@ -935,6 +937,7 @@ def from_edgelist( See Also -------- from_coo + from_dense to_edgelist Returns @@ -1042,7 +1045,7 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name): ) values = np.broadcast_to(values, indices.size) new_mat = ffi_new("GrB_Matrix*") - rv = Matrix._from_obj(new_mat, dtype, nrows, ncols, name=name) + rv = cls._from_obj(new_mat, dtype, nrows, ncols, name=name) if dtype._is_udt: dtype_name = "UDT" else: @@ -1328,30 +1331,183 @@ def from_dcsc( return cls.from_coo(row_indices, cols, values, dtype, nrows=nrows, ncols=ncols, name=name) @classmethod - def _from_dense(cls, values, dtype=None, *, name=None): - """Create a new Matrix from a dense numpy array.""" - # TODO: GraphBLAS needs a way to import or assign dense - # We could also handle F-contiguous data w/o a copy - # TODO: handle `Matrix._from_dense(np.arange(3*4*5).reshape(3, 4, 5))` as 3x4 Matrix - if backend == "suitesparse": # pragma: no cover (unused) - return Matrix.ss.import_fullr(values, dtype=dtype, name=name) - values, dtype = values_to_numpy_buffer(values, dtype) - if values.ndim < 2: - raise ValueError("A 2d array is required to create a dense Matrix") - if dtype.np_type.subdtype is not None and values.ndim < 3: + def from_scalar(cls, value, nrows, ncols, dtype=None, *, name=None, **opts): + """Create a fully dense Matrix filled with a scalar value. + + For SuiteSparse:GraphBLAS backend, this creates an iso-valued full Matrix + that stores a single value regardless of the shape of the Matrix, so large + matrices created by ``Matrix.from_scalar`` will use very low memory. + + If instead you want to create a new iso-valued Matrix with the same structure + as an existing Matrix, you may do: ``C = binary.second(A, value).new()``. + + Parameters + ---------- + value : scalar + Scalar value used to fill the Matrix. + nrows : int + Number of rows. + ncols : int + Number of columns. + dtype : DataType, optional + Data type of the Matrix. If not provided, the scalar value will be + inspected to choose an appropriate dtype. + name : str, optional + Name to give the Matrix. + + See Also + -------- + from_coo + from_dense + from_edgelist + + Returns + ------- + Matrix + """ + if type(value) is not Scalar: + try: + value = Scalar.from_value(value, dtype, is_cscalar=None, name="") + except TypeError: + value = cls()._expect_type( + value, + Scalar, + within="from_scalar", + keyword_name="value", + extra_message="Literal scalars also accepted.", + ) + dtype = value.dtype + elif dtype is None: + dtype = value.dtype + else: + dtype = lookup_dtype(dtype) + if backend == "suitesparse" and not dtype._is_udt: + # `Matrix.ss.import_fullr` does not yet handle all cases with UDTs + return cls.ss.import_fullr( + value, dtype=dtype, nrows=nrows, ncols=ncols, is_iso=True, name=name + ) + rv = cls(dtype, nrows=nrows, ncols=ncols, name=name) + rv(**opts) << value + return rv + + @classmethod + def from_dense(cls, values, missing_value=None, *, dtype=None, name=None, **opts): + """Create a Matrix from a NumPy array or list of lists. + + Parameters + ---------- + values : list or np.ndarray + List of values. + missing_value : scalar, optional + A scalar value to consider "missing"; elements of this value will be dropped. + If None, then the resulting Matrix will be dense. + dtype : DataType, optional + Data type of the Matrix. If not provided, the values will be inspected + to choose an appropriate dtype. + name : str, optional + Name to give the Matrix. + + See Also + -------- + from_coo + from_edgelist + from_scalar + to_dense + + Returns + ------- + Matrix + """ + values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=2) + if values.ndim == 0: + raise TypeError( + "values must be an array or list, not a scalar. " + "To create a dense Matrix from a scalar, use `Matrix.from_scalar`." + ) + if values.ndim == 1: + raise ValueError("A 2d array or scalar is required to create a dense Matrix") + if values.ndim == 2 and dtype.np_type.subdtype is not None: raise ValueError("A >2d array is required to create a dense Matrix with subdtype") - nrows, ncols, *rest = values.shape - cols, rows = np.meshgrid( - np.arange(ncols, dtype=np.uint64), - np.arange(nrows, dtype=np.uint64), - ) - rows = rows.ravel() - cols = cols.ravel() - if values.ndim > 2: - values = values.reshape([nrows * ncols, *rest]) + if values.ndim > 2 and dtype.np_type.subdtype is None: + raise ValueError(f"values array must be 2d to create dense Matrix with dtype {dtype}") + if backend == "suitesparse": + # Should we try to handle F-contiguous data w/o a copy? + rv = cls.ss.import_fullr(values, dtype=dtype, name=name) else: - values = values.ravel() - return cls.from_coo(rows, cols, values, dtype, nrows=nrows, ncols=ncols, name=name) + nrows, ncols, *rest = values.shape + indptr = np.arange(0, nrows * ncols + 1, ncols, dtype=np.uint64) + cols = np.repeat(np.arange(ncols, dtype=np.uint64)[None, :], nrows, 0).ravel() + if rest: # sub-array dtype + values = values.reshape(nrows * ncols, *rest) + else: + values = values.ravel() + rv = cls.from_csr( + indptr, + cols, + values, + dtype, + ncols=ncols, + name=name, + ) + if missing_value is not None: + rv(**opts) << select.valuene(rv, missing_value) + return rv + + def to_dense(self, fill_value=None, dtype=None, **opts): + """Convert Matrix to NumPy array of the same shape with missing values filled. + + .. warning:: + This can create very large arrays that require a lot of memory; please use caution. + + Parameters + ---------- + fill_value : scalar, optional + Value used to fill missing values. This is required if there are missing values. + dtype : DataType, optional + Requested dtype for the output values array. + + See Also + -------- + to_coo + to_dicts + to_edgelist + from_dense + + Returns + ------- + np.ndarray + """ + max_nvals = self._nrows * self._ncols + if fill_value is None or self._nvals == max_nvals: + if self._nvals != max_nvals: + raise TypeError( + "fill_value must be given in `to_dense` when there are missing values" + ) + if backend == "suitesparse": + info = self.ss.export("fullr") + return normalize_values(self, info["values"], dtype, self.shape, info["is_iso"]) + values = self.to_csr(dtype, sort=True)[2] + return values.reshape(self._nrows, self._ncols, *values.shape[1:]) + + if dtype is None and not self.dtype._is_udt: + # dtype of fill_value can upcast the dtype + if type(fill_value) is not Scalar: + try: + fill_value = Scalar.from_value(fill_value, is_cscalar=None, name="") + except TypeError: + fill_value = self._expect_type( + fill_value, + Scalar, + within="to_dense", + keyword_name="fill_value", + extra_message="Literal scalars also accepted.", + ) + dtype = unify(fill_value.dtype, self.dtype, is_left_scalar=True) + + rv = self.dup(dtype, clear=True, name="to_dense", **opts) + rv(**opts) << fill_value + rv(self.S, **opts) << self + return rv.to_dense(**opts) @classmethod def from_dicts( @@ -1435,7 +1591,7 @@ def from_dicts( *args, indptr, col_indices, values, dtype, nrows=nrows, ncols=ncols, name=name ) - def _to_csx(self, fmt, dtype=None): + def _to_csx(self, fmt, dtype, sort): Ap_len = _scalar_index("Ap_len") Ai_len = _scalar_index("Ai_len") Ax_len = _scalar_index("Ax_len") @@ -1464,12 +1620,24 @@ def _to_csx(self, fmt, dtype=None): Ax = Ax[:Ax_len] if dtype is not None: - dtype = lookup_dtype(dtype) - if dtype != self.dtype: - Ax = Ax.astype(dtype.np_type) + Ax = normalize_values(self, Ax, dtype) + if sort: + # indices may not be sorted within each Ai (i.e., row), so sort them + num = self._ncols if fmt is _CSR_FORMAT else self._nrows + if Ap[-1] == self._ncols * self._nrows: + # Fully dense matrix + indices = np.argsort(Ai + np.repeat(Ap[:-1], num)) + else: + offsets = np.repeat( + np.arange(0, (Ap.size - 1) * num, num, dtype=np.uint64), + np.diff(Ap.astype(np.int64)), + ) + indices = np.argsort(Ai + offsets) + Ai = Ai[indices] + Ax = Ax[indices] return Ap, Ai, Ax - def to_csr(self, dtype=None): + def to_csr(self, dtype=None, *, sort=True): """Returns three arrays of the standard CSR representation: indptr, col_indices, values. In CSR, the column indices for row i are stored in ``col_indices[indptr[i]:indptr[i+1]]`` @@ -1493,9 +1661,14 @@ def to_csr(self, dtype=None): Matrix.ss.export io.to_scipy_sparse """ - return self._to_csx(_CSR_FORMAT, dtype) + if backend == "suitesparse": + info = self.ss.export("csr", sort=sort) + cols = info["col_indices"] + values = normalize_values(self, info["values"], dtype, (cols.size,), info["is_iso"]) + return info["indptr"], cols, values + return self._to_csx(_CSR_FORMAT, dtype, sort) - def to_csc(self, dtype=None): + def to_csc(self, dtype=None, *, sort=True): """Returns three arrays of the standard CSC representation: indptr, row_indices, values. In CSC, the row indices for column i are stored in ``row_indices[indptr[i]:indptr[i+1]]`` @@ -1519,9 +1692,14 @@ def to_csc(self, dtype=None): Matrix.ss.export io.to_scipy_sparse """ - return self._to_csx(_CSC_FORMAT, dtype) + if backend == "suitesparse": + info = self.ss.export("csc", sort=sort) + rows = info["row_indices"] + values = normalize_values(self, info["values"], dtype, (rows.size,), info["is_iso"]) + return info["indptr"], rows, values + return self._to_csx(_CSC_FORMAT, dtype, sort) - def to_dcsr(self, dtype=None): + def to_dcsr(self, dtype=None, *, sort=True): """Returns four arrays of DCSR representation: compressed_rows, indptr, col_indices, values. In DCSR, we store the index of each non-empty row in ``compressed_rows``. @@ -1549,26 +1727,21 @@ def to_dcsr(self, dtype=None): io.to_scipy_sparse """ if backend == "suitesparse": - info = self.ss.export("hypercsr", sort=True) + info = self.ss.export("hypercsr", sort=sort) compressed_rows = info["rows"] indptr = info["indptr"] cols = info["col_indices"] - values = info["values"] - if info["is_iso"]: - values = np.broadcast_to(values, cols.size) + values = normalize_values(self, info["values"], dtype, (cols.size,), info["is_iso"]) else: - rows, cols, values = self.to_coo() # sorted by row then col + rows, cols, values = self.to_coo(sort=True) # sorted by row then col compressed_rows, indices = np.unique(rows, return_index=True) indptr = np.empty(indices.size + 1, np.uint64) indptr[:-1] = indices indptr[-1] = rows.size - if dtype is not None: - dtype = lookup_dtype(dtype) - if dtype != self.dtype: - values = values.astype(dtype.np_type) + values = normalize_values(self, values, dtype) return compressed_rows, indptr, cols, values - def to_dcsc(self, dtype=None): + def to_dcsc(self, dtype=None, *, sort=True): """Returns four arrays of DCSC representation: compressed_cols, indptr, row_indices, values. In DCSC, we store the index of each non-empty column in ``compressed_cols``. @@ -1596,13 +1769,11 @@ def to_dcsc(self, dtype=None): io.to_scipy_sparse """ if backend == "suitesparse": - info = self.ss.export("hypercsc", sort=True) + info = self.ss.export("hypercsc", sort=sort) compressed_cols = info["cols"] indptr = info["indptr"] rows = info["row_indices"] - values = info["values"] - if info["is_iso"]: - values = np.broadcast_to(values, rows.size) + values = normalize_values(self, info["values"], dtype, (rows.size,), info["is_iso"]) else: rows, cols, values = self.to_coo(sort=False) ind = np.lexsort((rows, cols)) # sort by columns, then rows @@ -1613,10 +1784,7 @@ def to_dcsc(self, dtype=None): indptr = np.empty(indices.size + 1, np.uint64) indptr[:-1] = indices indptr[-1] = cols.size - if dtype is not None: - dtype = lookup_dtype(dtype) - if dtype != self.dtype: - values = values.astype(dtype.np_type) + values = normalize_values(self, values, dtype) return compressed_cols, indptr, rows, values def to_dicts(self, order="rowwise"): @@ -2274,8 +2442,8 @@ def select(self, op, thunk=None): None, [self, mask, _select_mask, (self, mask)], # [*expr_args, func, args] expr_repr="{0.name}.select({1.name})", - nrows=self.nrows, - ncols=self.ncols, + nrows=self._nrows, + ncols=self._ncols, dtype=self.dtype, ) @@ -2819,10 +2987,7 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o except (TypeError, ValueError): if rowsize is not None or colsize is not None: try: - # Do a copy for suitesparse so we can give ownership to suitesparse - values, dtype = values_to_numpy_buffer( - value, dtype, copy=backend == "suitesparse" - ) + values, dtype = values_to_numpy_buffer(value, dtype) except Exception: pass else: @@ -2836,15 +3001,7 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o # C[i, J](m) << [1, 2, 3] expected_shape = (rowsize or colsize,) try: - if backend == "suitesparse": - vals = Vector.ss.import_full( - values, dtype=dtype, take_ownership=True - ) - else: - # TODO: GraphBLAS needs a way to import or assign dense - vals = Vector.from_coo( - np.arange(shape[0]), values, dtype, size=shape[0] - ) + vals = Vector.from_dense(values, dtype=dtype) except Exception: # pragma: no cover (safety) vals = None else: @@ -2856,12 +3013,7 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o # C[I, J](M) << [[1, 2, 3], [4, 5, 6]] expected_shape = (rowsize, colsize) try: - if backend == "suitesparse": - vals = Matrix.ss.import_fullr( - values, dtype=dtype, take_ownership=True - ) - else: - vals = Matrix._from_dense(values, dtype) + vals = Matrix.from_dense(values, dtype=dtype) except Exception: vals = None else: @@ -3219,6 +3371,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): to_csr = wrapdoc(Matrix.to_csr)(property(automethods.to_csr)) to_dcsc = wrapdoc(Matrix.to_dcsc)(property(automethods.to_dcsc)) to_dcsr = wrapdoc(Matrix.to_dcsr)(property(automethods.to_dcsr)) + to_dense = wrapdoc(Matrix.to_dense)(property(automethods.to_dense)) to_dicts = wrapdoc(Matrix.to_dicts)(property(automethods.to_dicts)) to_edgelist = wrapdoc(Matrix.to_edgelist)(property(automethods.to_edgelist)) to_values = wrapdoc(Matrix.to_values)(property(automethods.to_values)) @@ -3318,6 +3471,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): to_csr = wrapdoc(Matrix.to_csr)(property(automethods.to_csr)) to_dcsc = wrapdoc(Matrix.to_dcsc)(property(automethods.to_dcsc)) to_dcsr = wrapdoc(Matrix.to_dcsr)(property(automethods.to_dcsr)) + to_dense = wrapdoc(Matrix.to_dense)(property(automethods.to_dense)) to_dicts = wrapdoc(Matrix.to_dicts)(property(automethods.to_dicts)) to_edgelist = wrapdoc(Matrix.to_edgelist)(property(automethods.to_edgelist)) to_values = wrapdoc(Matrix.to_values)(property(automethods.to_values)) @@ -3419,20 +3573,25 @@ def _name_html(self): return f"{self._matrix._name_html}.T" @wrapdoc(Matrix.to_csr) - def to_csr(self, dtype=None): - return self._matrix.to_csc(dtype) + def to_csr(self, dtype=None, *, sort=True): + return self._matrix.to_csc(dtype, sort=sort) @wrapdoc(Matrix.to_csc) - def to_csc(self, dtype=None): - return self._matrix.to_csr(dtype) + def to_csc(self, dtype=None, *, sort=True): + return self._matrix.to_csr(dtype, sort=sort) @wrapdoc(Matrix.to_dcsr) - def to_dcsr(self, dtype=None): - return self._matrix.to_dcsc(dtype) + def to_dcsr(self, dtype=None, *, sort=True): + return self._matrix.to_dcsc(dtype, sort=sort) @wrapdoc(Matrix.to_dcsc) - def to_dcsc(self, dtype=None): - return self._matrix.to_dcsr(dtype) + def to_dcsc(self, dtype=None, *, sort=True): + return self._matrix.to_dcsr(dtype, sort=sort) + + @wrapdoc(Matrix.to_dense) + def to_dense(self, fill_value=None, dtype=None, **opts): + rv = self._matrix.to_dense(fill_value, dtype, **opts) + return rv.swapaxes(0, 1) @wrapdoc(Matrix.to_dicts) def to_dicts(self, order="rowwise"): diff --git a/graphblas/core/ss/matrix.py b/graphblas/core/ss/matrix.py index 6c4809a83..b455d760e 100644 --- a/graphblas/core/ss/matrix.py +++ b/graphblas/core/ss/matrix.py @@ -559,7 +559,7 @@ def export(self, format=None, *, sort=False, give_ownership=False, raw=False, ** "coo", "coor", or "cooc". give_ownership : bool, default False Perform a zero-copy data transfer to Python if possible. This gives ownership of - the underlying memory buffers to Numpy. + the underlying memory buffers to NumPy. ** If True, this nullifies the current object, which should no longer be used! ** raw : bool, default False If True, always return 1d arrays the same size as returned by SuiteSparse. @@ -3733,6 +3733,7 @@ def scan_columnwise(self, op=monoid.plus, *, name=None, **opts): "`Matrix.ss.scan_columnwise` is deprecated; " 'please use `Matrix.ss.scan(order="columnwise")` instead.', DeprecationWarning, + stacklevel=2, ) return prefix_scan(self._parent.T, op, name=name, within="scan_columnwise", **opts) @@ -3754,6 +3755,7 @@ def scan_rowwise(self, op=monoid.plus, *, name=None, **opts): warnings.warn( "`Matrix.ss.scan_rowwise` is deprecated; please use `Matrix.ss.scan` instead.", DeprecationWarning, + stacklevel=2, ) return prefix_scan(self._parent, op, name=name, within="scan_rowwise", **opts) @@ -3920,6 +3922,7 @@ def selectk_rowwise(self, how, k, *, name=None): # pragma: no cover (deprecated warnings.warn( "`Matrix.ss.selectk_rowwise` is deprecated; please use `Matrix.ss.selectk` instead.", DeprecationWarning, + stacklevel=2, ) how = how.lower() fmt = "hypercsr" @@ -3966,6 +3969,7 @@ def selectk_columnwise(self, how, k, *, name=None): # pragma: no cover (depreca "`Matrix.ss.selectk_columnwise` is deprecated; " 'please use `Matrix.ss.selectk(order="columnwise")` instead.', DeprecationWarning, + stacklevel=2, ) how = how.lower() fmt = "hypercsc" @@ -4090,6 +4094,7 @@ def compactify_rowwise( "`Matrix.ss.compactify_rowwise` is deprecated; " "please use `Matrix.ss.compactify` instead.", DeprecationWarning, + stacklevel=2, ) return self._compactify( how, reverse, asindex, "ncols", ncols, "hypercsr", "col_indices", name @@ -4132,6 +4137,7 @@ def compactify_columnwise( "`Matrix.ss.compactify_columnwise` is deprecated; " 'please use `Matrix.ss.compactify(order="columnwise")` instead.', DeprecationWarning, + stacklevel=2, ) return self._compactify( how, reverse, asindex, "nrows", nrows, "hypercsc", "row_indices", name diff --git a/graphblas/core/ss/vector.py b/graphblas/core/ss/vector.py index 9635e8fb9..d13d78ac3 100644 --- a/graphblas/core/ss/vector.py +++ b/graphblas/core/ss/vector.py @@ -425,7 +425,7 @@ def export(self, format=None, *, sort=False, give_ownership=False, raw=False, ** Whether to sort indices if the format is "sparse" give_ownership : bool, default False Perform a zero-copy data transfer to Python if possible. This gives ownership of - the underlying memory buffers to Numpy. + the underlying memory buffers to NumPy. ** If True, this nullifies the current object, which should no longer be used! ** raw : bool, default False If True, always return array the same size as returned by SuiteSparse. diff --git a/graphblas/core/utils.py b/graphblas/core/utils.py index b09f71713..83fa15cd5 100644 --- a/graphblas/core/utils.py +++ b/graphblas/core/utils.py @@ -106,6 +106,22 @@ def values_to_numpy_buffer( return array, dtype +def normalize_values(self, values, dtype, shape=None, is_iso=False): + """Expand and/or update dtype of values array.""" + if dtype is not None: + dtype = lookup_dtype(dtype) + if dtype != self.dtype: + values = values.astype(dtype.np_type) # copies + else: + dtype = self.dtype + if is_iso: + if dtype.np_type.subdtype is None: + values = np.broadcast_to(values, shape) + else: + values = np.broadcast_to(values, shape + values.shape) + return values + + def get_shape(nrows, ncols, dtype=None, **arrays): if nrows is None or ncols is None: # Get nrows and ncols from the first 2d array @@ -367,4 +383,7 @@ def _autogenerate_code( f.write(new_text) import subprocess - subprocess.check_call(["black", filename]) + try: + subprocess.check_call(["black", filename]) + except FileNotFoundError: # pragma: no cover (safety) + pass # It's okay if `black` isn't installed; pre-commit hooks will do linting diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index ee1868cf6..dd183d856 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -26,6 +26,7 @@ _Pointer, class_property, ints_to_numpy_buffer, + normalize_values, output_type, values_to_numpy_buffer, wrapdoc, @@ -433,6 +434,7 @@ def to_values(self, dtype=None, *, indices=True, values=True, sort=True): warnings.warn( "`Vector.to_values(...)` is deprecated; please use `Vector.to_coo(...)` instead.", DeprecationWarning, + stacklevel=2, ) return self.to_coo(dtype, indices=indices, values=values, sort=sort) @@ -452,6 +454,7 @@ def to_coo(self, dtype=None, *, indices=True, values=True, sort=True): See Also -------- + to_dense to_dict from_coo @@ -478,11 +481,7 @@ def to_coo(self, dtype=None, *, indices=True, values=True, sort=True): f"GrB_Vector_extractTuples_{dtype_name}", [c_indices, c_values, _Pointer(scalar), self] ) if values: - c_values = c_values.array - if dtype is not None: - dtype = lookup_dtype(dtype) - if dtype != self.dtype: - c_values = c_values.astype(dtype.np_type) # copies + c_values = normalize_values(self, c_values.array, dtype) if sort and backend != "suitesparse": c_indices = c_indices.array ind = np.argsort(c_indices) @@ -682,6 +681,7 @@ def from_values(cls, indices, values, dtype=None, *, size=None, dup_op=None, nam warnings.warn( "`Vector.from_values(...)` is deprecated; please use `Vector.from_coo(...)` instead.", DeprecationWarning, + stacklevel=2, ) return cls.from_coo(indices, values, dtype, size=size, dup_op=dup_op, name=name) @@ -709,6 +709,7 @@ def from_coo(cls, indices, values=1.0, dtype=None, *, size=None, dup_op=None, na See Also -------- + from_dense from_dict from_pairs to_coo @@ -766,6 +767,7 @@ def from_pairs(cls, pairs, dtype=None, *, size=None, dup_op=None, name=None): See Also -------- from_coo + from_dense from_dict to_coo @@ -788,6 +790,172 @@ def from_pairs(cls, pairs, dtype=None, *, size=None, dup_op=None, name=None): ) return cls.from_coo(indices, values, dtype, size=size, dup_op=dup_op, name=name) + @classmethod + def from_scalar(cls, value, size, dtype=None, *, name=None, **opts): + """Create a fully dense Vector filled with a scalar value. + + For SuiteSparse:GraphBLAS backend, this creates an iso-valued full Vector + that stores a single value regardless of the size of the Vector, so large + vectors created by ``Vector.from_scalar`` will use very low memory. + + If instead you want to create a new iso-valued Vector with the same structure + as an existing Vector, you may do: ``w = binary.second(v, value).new()``. + + Parameters + ---------- + value : scalar + Scalar value used to fill the Vector. + nrows : int + Number of rows. + ncols : int + Number of columns. + dtype : DataType, optional + Data type of the Vector. If not provided, the scalar value will be + inspected to choose an appropriate dtype. + name : str, optional + Name to give the Vector. + + See Also + -------- + from_coo + from_dense + from_dict + from_pairs + + Returns + ------- + Vector + """ + if type(value) is not Scalar: + try: + value = Scalar.from_value(value, dtype, is_cscalar=None, name="") + except TypeError: + value = cls()._expect_type( + value, + Scalar, + within="from_scalar", + keyword_name="value", + extra_message="Literal scalars also accepted.", + ) + dtype = value.dtype + elif dtype is None: + dtype = value.dtype + else: + dtype = lookup_dtype(dtype) + if backend == "suitesparse" and not dtype._is_udt: + # `Vector.ss.import_full` does not yet handle all cases with UDTs + return cls.ss.import_full(value, dtype=dtype, size=size, is_iso=True, name=name) + rv = cls(dtype, size, name=name) + rv(**opts) << value + return rv + + @classmethod + def from_dense(cls, values, missing_value=None, *, dtype=None, name=None, **opts): + """Create a Vector from a NumPy array or list. + + Parameters + ---------- + values : list or np.ndarray + List of values. + missing_value : scalar, optional + A scalar value to consider "missing"; elements of this value will be dropped. + If None, then the resulting Vector will be dense. + dtype : DataType, optional + Data type of the Vector. If not provided, the values will be inspected + to choose an appropriate dtype. + name : str, optional + Name to give the Vector. + + See Also + -------- + from_coo + from_dict + from_pairs + from_scalar + to_dense + + Returns + ------- + Vector + """ + values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1) + if values.ndim == 0: + raise TypeError( + "values must be an array or list, not a scalar. " + "To create a dense Vector from a scalar, use `Vector.from_scalar`." + ) + if values.ndim == 1 and dtype.np_type.subdtype is not None: + raise ValueError("A >1d array is required to create a dense Vector with subdtype") + if values.ndim > 1 and dtype.np_type.subdtype is None: + raise ValueError(f"values array must be 1d to create dense Vector with dtype {dtype}") + if backend == "suitesparse": + rv = cls.ss.import_full(values, dtype=dtype, name=name) + else: + # TODO: GraphBLAS needs a better way to import or assign dense + rv = cls.from_coo( + np.arange(values.shape[0], dtype=np.uint64), + values, + dtype, + size=values.shape[0], + name=name, + ) + if missing_value is not None: + rv(**opts) << select.valuene(rv, missing_value) + return rv + + def to_dense(self, fill_value=None, dtype=None, **opts): + """Convert Vector to NumPy array of the same shape with missing values filled. + + .. warning:: + This can create very large arrays that require a lot of memory; please use caution. + + Parameters + ---------- + fill_value : scalar, optional + Value used to fill missing values. This is required if there are missing values. + dtype : DataType, optional + Requested dtype for the output values array. + + See Also + -------- + to_coo + to_dict + from_dense + + Returns + ------- + np.ndarray + """ + if fill_value is None or self._nvals == self._size: + if self._nvals != self._size: + raise TypeError( + "fill_value must be given in `to_dense` when there are missing values" + ) + if backend == "suitesparse": + info = self.ss.export("full") + return normalize_values(self, info["values"], dtype, self._size, info["is_iso"]) + return self.to_coo(dtype, indices=False)[1] + + if dtype is None and not self.dtype._is_udt: + # dtype of fill_value can upcast the dtype + if type(fill_value) is not Scalar: + try: + fill_value = Scalar.from_value(fill_value, is_cscalar=None, name="") + except TypeError: + fill_value = self._expect_type( + fill_value, + Scalar, + within="to_dense", + keyword_name="fill_value", + extra_message="Literal scalars also accepted.", + ) + dtype = unify(fill_value.dtype, self.dtype, is_left_scalar=True) + + rv = self.dup(dtype, clear=True, name="to_dense", **opts) + rv(**opts) << fill_value + rv(self.S, **opts) << self + return rv.to_dense(**opts) + @property def _carg(self): return self.gb_obj[0] @@ -1307,7 +1475,7 @@ def select(self, op, thunk=None): None, [self, mask, _select_mask, (self, mask)], # [*expr_args, func, args] expr_repr="{0.name}.select({1.name})", - size=self.size, + size=self._size, dtype=self.dtype, ) @@ -1642,24 +1810,13 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o # v(m)[I] << [1, 2, 3] # v[I](m) << [1, 2, 3] try: - # Do a copy for suitesparse so we can give ownership to suitesparse - values, dtype = values_to_numpy_buffer( - value, dtype, copy=backend == "suitesparse" - ) + values, dtype = values_to_numpy_buffer(value, dtype) except Exception: extra_message = "Literal scalars and lists also accepted." else: shape = values.shape try: - if backend == "suitesparse": - vals = Vector.ss.import_full( - values, dtype=dtype, take_ownership=True - ) - else: - # TODO: GraphBLAS needs a way to import or assign dense - vals = Vector.from_coo( - np.arange(shape[0]), values, dtype, size=shape[0] - ) + vals = Vector.from_dense(values, dtype=dtype) except Exception: # pragma: no cover (safety) vals = None else: @@ -1772,6 +1929,7 @@ def from_dict(cls, d, dtype=None, *, size=None, name=None): See Also -------- from_coo + from_dense from_pairs to_dict @@ -1799,6 +1957,7 @@ def to_dict(self): See Also -------- to_coo + to_dense from_dict Returns @@ -1890,7 +2049,7 @@ def shape(self): @wrapdoc(Vector.dup) def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): if clear: - return Vector(self.dtype if dtype is None else dtype, self.size, name=name) + return Vector(self.dtype if dtype is None else dtype, self._size, name=name) return self._new(dtype, mask, name) # Begin auto-generated code: Vector @@ -1931,6 +2090,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): else: ss = Vector.__dict__["ss"] # raise if used to_coo = wrapdoc(Vector.to_coo)(property(automethods.to_coo)) + to_dense = wrapdoc(Vector.to_dense)(property(automethods.to_dense)) to_dict = wrapdoc(Vector.to_dict)(property(automethods.to_dict)) to_values = wrapdoc(Vector.to_values)(property(automethods.to_values)) vxm = wrapdoc(Vector.vxm)(property(automethods.vxm)) @@ -2015,6 +2175,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): else: ss = Vector.__dict__["ss"] # raise if used to_coo = wrapdoc(Vector.to_coo)(property(automethods.to_coo)) + to_dense = wrapdoc(Vector.to_dense)(property(automethods.to_dense)) to_dict = wrapdoc(Vector.to_dict)(property(automethods.to_dict)) to_values = wrapdoc(Vector.to_values)(property(automethods.to_values)) vxm = wrapdoc(Vector.vxm)(property(automethods.vxm)) diff --git a/graphblas/dtypes.py b/graphblas/dtypes.py index e864a412f..2f8b40e43 100644 --- a/graphblas/dtypes.py +++ b/graphblas/dtypes.py @@ -130,7 +130,8 @@ def register_anonymous(dtype, name=None): np_repr = np_repr[: _lib.GxB_MAX_NAME_LEN] _warnings.warn( f"{msg}. It will use the following name, " - f"and the dtype may need to be specified when deserializing: {np_repr}" + f"and the dtype may need to be specified when deserializing: {np_repr}", + stacklevel=2, ) status = _lib.GxB_Type_new(gb_obj, dtype.itemsize, np_repr, _NULL) else: diff --git a/graphblas/io.py b/graphblas/io.py index 5111371ea..0aeb174e8 100644 --- a/graphblas/io.py +++ b/graphblas/io.py @@ -4,6 +4,7 @@ from . import backend as _backend from .core.matrix import Matrix as _Matrix +from .core.utils import normalize_values as _normalize_values from .core.utils import output_type as _output_type from .core.vector import Vector as _Vector from .dtypes import lookup_dtype as _lookup_dtype @@ -57,10 +58,14 @@ def from_networkx(G, nodelist=None, dtype=None, weight="weight", name=None): return from_scipy_sparse(A, name=name) -# TODO: add parameter to indicate empty value (default is 0 and NaN) -def from_numpy(m): +def from_numpy(m): # pragma: no cover (deprecated) """Create a sparse Vector or Matrix from a dense numpy array. + .. deprecated:: 2023.2.0 + `from_numpy` will be removed in a future release. + Use `Vector.from_dense` or `Matrix.from_dense` instead. + Will be removed in version 2023.10.0 or later + A value of 0 is considered as "missing". - m.ndim == 1 returns a `Vector` @@ -74,10 +79,21 @@ def from_numpy(m): m : np.ndarray Input array + See Also + -------- + Matrix.from_dense + Vector.from_dense + from_scipy_sparse + Returns ------- Vector or Matrix """ + _warn( + "`graphblas.io.from_numpy` is deprecated; " + "use `Matrix.from_dense` and `Vector.from_dense` instead.", + DeprecationWarning, + ) if m.ndim > 2: raise _GraphblasException("m.ndim must be <= 2") @@ -303,9 +319,14 @@ def to_networkx(m, edge_attribute="weight"): return G -def to_numpy(m): +def to_numpy(m): # pragma: no cover (deprecated) """Create a dense numpy array from a sparse Vector or Matrix. + .. deprecated:: 2023.2.0 + `to_numpy` will be removed in a future release. + Use `Vector.to_dense` or `Matrix.to_dense` instead. + Will be removed in version 2023.10.0 or later + Missing values will become 0 in the output. numpy dtype will match the GraphBLAS dtype @@ -315,10 +336,21 @@ def to_numpy(m): m : Vector or Matrix GraphBLAS Vector or Matrix + See Also + -------- + to_scipy_sparse + Matrix.to_dense + Vector.to_dense + Returns ------- np.ndarray """ + _warn( + "`graphblas.io.to_numpy` is deprecated; " + "use `Matrix.to_dense` and `Vector.to_dense` instead.", + DeprecationWarning, + ) try: import scipy # noqa: F401 except ImportError: # pragma: no cover (import) @@ -391,13 +423,10 @@ def to_scipy_sparse(A, format="csr"): info["col_indices"] = info["row_indices"] else: info = A.ss.export(format, sort=True) - if info["is_iso"]: - info["values"] = _np.broadcast_to(info["values"], A._nvals) + values = _normalize_values(A, info["values"], None, (A._nvals,), info["is_iso"]) if format == "csr": - return ss.csr_array( - (info["values"], info["col_indices"], info["indptr"]), shape=A.shape - ) - return ss.csc_array((info["values"], info["row_indices"], info["indptr"]), shape=A.shape) + return ss.csr_array((values, info["col_indices"], info["indptr"]), shape=A.shape) + return ss.csc_array((values, info["row_indices"], info["indptr"]), shape=A.shape) elif format == "csr": indptr, cols, vals = A.to_csr() return ss.csr_array((vals, cols, indptr), shape=A.shape) @@ -603,11 +632,7 @@ def mmread(source, *, dup_op=None, name=None): return _Matrix.from_coo( array.row, array.col, array.data, nrows=nrows, ncols=ncols, dup_op=dup_op, name=name ) - if _backend == "suitesparse": - return _Matrix.ss.import_fullr(values=array, take_ownership=True, name=name) - rv = _Matrix(array.dtype, *array.shape, name=name) - rv[...] = array - return rv + return _Matrix.from_dense(array, name=name) def mmwrite(target, matrix, *, comment="", field=None, precision=None, symmetry=None): diff --git a/graphblas/op/__init__.py b/graphblas/op/__init__.py index b02b5228c..af05cbef4 100644 --- a/graphblas/op/__init__.py +++ b/graphblas/op/__init__.py @@ -17,6 +17,7 @@ def __getattr__(key): f"`{key}` is specific to SuiteSparse:GraphBLAS. " f"`gb.op.{key}` will be removed in version 2023.9.0 or later.", DeprecationWarning, + stacklevel=2, ) rv = _deprecated[key] globals()[key] = rv diff --git a/graphblas/semiring/__init__.py b/graphblas/semiring/__init__.py index be181d405..904ae192f 100644 --- a/graphblas/semiring/__init__.py +++ b/graphblas/semiring/__init__.py @@ -17,6 +17,7 @@ def __getattr__(key): f"`{key}` is specific to SuiteSparse:GraphBLAS. " f"`gb.semiring.{key}` will be removed in version 2023.9.0 or later.", DeprecationWarning, + stacklevel=2, ) rv = _deprecated[key] globals()[key] = rv diff --git a/graphblas/tests/test_io.py b/graphblas/tests/test_io.py index eb743daaa..0f78430c1 100644 --- a/graphblas/tests/test_io.py +++ b/graphblas/tests/test_io.py @@ -4,7 +4,7 @@ import pytest import graphblas as gb -from graphblas import Matrix, dtypes +from graphblas import Matrix, Vector, dtypes from graphblas.exceptions import GraphblasException try: @@ -34,12 +34,25 @@ suitesparse = gb.backend == "suitesparse" +@pytest.mark.skipif("not ss") +def test_deprecated(): + a = np.array([0.0, 2.0, 4.1]) + with pytest.warns(DeprecationWarning): + v = gb.io.from_numpy(a) + assert v.isequal(gb.Vector.from_coo([1, 2], [2.0, 4.1]), check_dtype=True) + with pytest.warns(DeprecationWarning): + a2 = gb.io.to_numpy(v) + np.testing.assert_array_equal(a, a2) + with pytest.warns(DeprecationWarning): + gb.io.to_scipy_sparse_matrix(v, "coo") + + @pytest.mark.skipif("not ss") def test_vector_to_from_numpy(): a = np.array([0.0, 2.0, 4.1]) - v = gb.io.from_numpy(a) + v = Vector.from_dense(a, 0) assert v.isequal(gb.Vector.from_coo([1, 2], [2.0, 4.1]), check_dtype=True) - a2 = gb.io.to_numpy(v) + a2 = v.to_dense(0) np.testing.assert_array_equal(a, a2) csr = gb.io.to_scipy_sparse(v, "csr") @@ -59,17 +72,14 @@ def test_vector_to_from_numpy(): assert coo.nnz == 2 np.testing.assert_array_equal(coo.toarray(), np.array([[0.0, 2.0, 4.1]])) - with pytest.warns(DeprecationWarning): - coo = gb.io.to_scipy_sparse_matrix(v, "coo") - @pytest.mark.skipif("not ss") @pytest.mark.parametrize("a", [np.array([7, 0]), np.array([0, 0]), np.array([])]) def test_vector_to_from_numpy_correct_size(a): # Make sure we use the right size - v = gb.io.from_numpy(a) + v = Vector.from_dense(a, 0) assert v.shape == a.shape - b = gb.io.to_numpy(v) + b = v.to_dense(0) np.testing.assert_array_equal(a, b) csr = gb.io.to_scipy_sparse(v, "csr") np.testing.assert_array_equal(a[None, :], csr.toarray()) @@ -80,9 +90,9 @@ def test_vector_to_from_numpy_correct_size(a): @pytest.mark.skipif("not ss") def test_matrix_to_from_numpy(): a = np.array([[1.0, 0.0], [2.0, 3.7]]) - M = gb.io.from_numpy(a) + M = Matrix.from_dense(a, 0) assert M.isequal(gb.Matrix.from_coo([0, 1, 1], [0, 0, 1], [1.0, 2.0, 3.7]), check_dtype=True) - a2 = gb.io.to_numpy(M) + a2 = M.to_dense(0) np.testing.assert_array_equal(a, a2) for format in ["csr", "csc", "coo"]: @@ -96,9 +106,6 @@ def test_matrix_to_from_numpy(): with pytest.raises(ValueError, match="Invalid format"): gb.io.to_scipy_sparse(M, "bad format") - with pytest.raises(GraphblasException, match="ndim must be"): - gb.io.from_numpy(np.array([[[1.0, 0.0], [2.0, 3.7]]])) - @pytest.mark.skipif("not nx or not ss") def test_matrix_to_from_networkx(): diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py index c10a76793..92d3fad13 100644 --- a/graphblas/tests/test_matrix.py +++ b/graphblas/tests/test_matrix.py @@ -2881,7 +2881,6 @@ def test_expr_is_like_matrix(A): "_deserialize", "_extract_element", "_from_csx", - "_from_dense", "_from_obj", "_name_counter", "_parent", @@ -2896,8 +2895,10 @@ def test_expr_is_like_matrix(A): "from_csr", "from_dcsc", "from_dcsr", + "from_dense", "from_dicts", "from_edgelist", + "from_scalar", "from_values", "resize", "update", @@ -2944,7 +2945,6 @@ def test_index_expr_is_like_matrix(A): "_deserialize", "_extract_element", "_from_csx", - "_from_dense", "_from_obj", "_name_counter", "_parent", @@ -2959,9 +2959,11 @@ def test_index_expr_is_like_matrix(A): "from_csr", "from_dcsc", "from_dcsr", + "from_dense", "from_dicts", "from_edgelist", "from_values", + "from_scalar", "resize", } assert attrs - expr_attrs == expected, ( @@ -4107,6 +4109,56 @@ def test_to_from_edgelist(A): Matrix.from_edgelist([[0, 1, 10], [2, 3, 20]], values=0) +def test_from_scalar(): + A = Matrix.from_scalar(1, nrows=2, ncols=3) + B = Matrix(int, nrows=2, ncols=3) + B << 1 + assert A.isequal(B, check_dtype=True) + assert_array_equal(A.to_dense(dtype=float), [[1.0, 1, 1], [1, 1, 1]]) + A = Matrix.from_scalar(Scalar.from_value(1), nrows=2, ncols=3) + assert A.isequal(B, check_dtype=True) + A = Matrix.from_scalar(Scalar.from_value(1.0), 2, 3, int) + assert A.isequal(B, check_dtype=True) + with pytest.raises(TypeError, match="missing"): + Matrix.from_scalar(1, nrows=2) + with pytest.raises(TypeError, match="Literal scalars also accepted"): + Matrix.from_scalar(A, nrows=2, ncols=3) + A = Matrix.from_scalar(1, dtype="INT64[2]", nrows=3, ncols=4) + B = Matrix("INT64[2]", nrows=3, ncols=4) + B << [1, 1] + assert A.isequal(B, check_dtype=True) + + +def test_to_dense_from_dense(): + A = Matrix.from_dense(np.arange(6).reshape(2, 3)) + B = Matrix.from_coo([0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2], np.arange(6)) + assert A.isequal(B, check_dtype=True) + assert_array_equal(A.to_dense(dtype=int), [[0, 1, 2], [3, 4, 5]]) + assert_array_equal(A.T.to_dense(dtype=int), [[0, 3], [1, 4], [2, 5]]) + del A[0, 0] + assert_array_equal(A.to_dense(6.5), [[6.5, 1, 2], [3, 4, 5]]) + assert_array_equal(A.to_dense(6.5, int), [[6, 1, 2], [3, 4, 5]]) + assert_array_equal(A.to_dense(Scalar.from_value(6.5)), [[6.5, 1, 2], [3, 4, 5]]) + + A = Matrix.from_dense(np.arange(6).reshape(2, 3)) + A.resize(3, 4) + B.resize(3, 4) + assert A.isequal(B, check_dtype=True) + assert_array_equal(A.to_dense(10), [[0, 1, 2, 10], [3, 4, 5, 10], [10, 10, 10, 10]]) + with pytest.raises(ValueError, match="is required to create a dense"): + Matrix.from_dense([1, 2, 3]) + with pytest.raises(TypeError, match="fill_value must be given"): + A.to_dense() + with pytest.raises(TypeError, match="Bad type for keyword argument `fill_value"): + A.to_dense(object()) + with pytest.raises(ValueError, match="must be 2d"): + Matrix.from_dense(np.arange(24).reshape(2, 3, 4), dtype=int) + with pytest.raises(ValueError, match=">2d array"): + Matrix.from_dense(np.arange(6).reshape(2, 3), dtype="INT64[2]") + with pytest.raises(TypeError, match="from_scalar"): + Matrix.from_dense(1) + + @pytest.mark.skipif("not suitesparse") def test_ss_sort(A): A[3, 0] = 9 @@ -4263,6 +4315,10 @@ def test_subarray_dtypes(): Full2 = Matrix("INT64[4]", nrows=2, ncols=3) Full2[:, :] = b2 assert Full1.isequal(Full2, check_dtype=True) + Full2 = Matrix.from_dense(b2) + assert Full1.isequal(Full2, check_dtype=True) + Full2 = Matrix.from_dense(Full1.to_dense()) + assert Full2.isequal(Full2, check_dtype=True) if suitesparse: Full2 = Matrix.ss.import_fullr(b2) assert Full1.isequal(Full2, check_dtype=True) diff --git a/graphblas/tests/test_prefix_scan.py b/graphblas/tests/test_prefix_scan.py index 83742c0e0..ea169a632 100644 --- a/graphblas/tests/test_prefix_scan.py +++ b/graphblas/tests/test_prefix_scan.py @@ -1,21 +1,14 @@ import numpy as np import pytest -import graphblas as gb from graphblas import backend, binary, monoid from graphblas import Matrix, Vector # isort:skip (for dask-graphblas) -try: - # gb.io.to_numpy currently requires scipy - import scipy.sparse as ss -except ImportError: # pragma: no cover (import) - ss = None - suitesparse = backend == "suitesparse" -@pytest.mark.skipif("not ss or not suitesparse") +@pytest.mark.skipif("not suitesparse") @pytest.mark.parametrize("method", ["scan_rowwise", "scan_columnwise"]) @pytest.mark.parametrize("length", list(range(34))) @pytest.mark.parametrize("do_random", [False, True]) @@ -38,7 +31,7 @@ def test_scan_matrix(method, length, do_random): M = M.T.new(name="A") R = M.ss.scan(binary.plus, order="col").T.new() - result = gb.io.to_numpy(R) + result = R.to_dense(0) try: np.testing.assert_array_equal(result, expected) except Exception: # pragma: no cover (debug) @@ -46,7 +39,7 @@ def test_scan_matrix(method, length, do_random): raise -@pytest.mark.skipif("not ss or not suitesparse") +@pytest.mark.skipif("not suitesparse") @pytest.mark.parametrize("length", list(range(34))) @pytest.mark.parametrize("do_random", [False, True]) def test_scan_vector(length, do_random): @@ -62,7 +55,7 @@ def test_scan_vector(length, do_random): v = Vector.ss.import_full(values=a) expected = a.cumsum() r = v.ss.scan() - result = gb.io.to_numpy(r) + result = r.to_dense(0) try: np.testing.assert_array_equal(result, expected) except Exception: # pragma: no cover (debug) diff --git a/graphblas/tests/test_vector.py b/graphblas/tests/test_vector.py index 70f1d65c7..f373e39a9 100644 --- a/graphblas/tests/test_vector.py +++ b/graphblas/tests/test_vector.py @@ -1616,8 +1616,10 @@ def test_expr_is_like_vector(v): "build", "clear", "from_coo", + "from_dense", "from_dict", "from_pairs", + "from_scalar", "from_values", "resize", "update", @@ -1663,8 +1665,10 @@ def test_index_expr_is_like_vector(v): "build", "clear", "from_coo", + "from_dense", "from_dict", "from_pairs", + "from_scalar", "from_values", "resize", } @@ -2486,6 +2490,52 @@ def test_from_pairs(): Vector.from_pairs([[1, 2, 3], [4, 5, 6]]) +def test_from_scalar(): + v = Vector.from_scalar(1, size=3) + w = Vector.from_coo([0, 1, 2], 1) + assert v.isequal(w, check_dtype=True) + assert_array_equal(v.to_dense(), [1, 1, 1]) + v = Vector.from_scalar(Scalar.from_value(1), size=3) + assert v.isequal(w, check_dtype=True) + v = Vector.from_scalar(Scalar.from_value(1.0), 3, int) + with pytest.raises(TypeError, match="missing"): + Vector.from_scalar(1) + with pytest.raises(TypeError, match="Literal scalars also accepted"): + Vector.from_scalar(v, size=2) + v = Vector.from_scalar(1, dtype="INT64[2]", size=3) + w = Vector("INT64[2]", size=3) + w << [1, 1] + assert v.isequal(w, check_dtype=True) + + +def test_to_dense_from_dense(): + v = Vector.from_dense([1, 2, 3]) + w = Vector.from_coo([0, 1, 2], [1, 2, 3]) + assert v.isequal(w, check_dtype=True) + assert_array_equal(v.to_dense(dtype=int), [1, 2, 3]) + v = Vector.from_dense([1, 2, 3]) + v.resize(4) + w = Vector.from_coo([0, 1, 2], [1, 2, 3], size=4) + assert v.isequal(w, check_dtype=True) + assert_array_equal(v.to_dense(4.5, dtype=float), [1, 2, 3, 4.5]) + assert_array_equal(v.to_dense(4.5), [1, 2, 3, 4.5]) # Scalar type can upcast + assert_array_equal(v.to_dense(Scalar.from_value(4)), [1, 2, 3, 4]) + with pytest.raises(TypeError, match="fill_value must be given"): + v.to_dense() + with pytest.raises(TypeError, match="Bad type for keyword argument `fill_value"): + v.to_dense(object()) + v = Vector.from_dense([1, 2]) + w = Vector.from_coo([0, 1], [1, 2], size=2) + assert v.isequal(w, check_dtype=True) + assert_array_equal(v.to_dense(dtype=float), [1.0, 2]) + with pytest.raises(ValueError, match="must be 1d"): + Vector.from_dense(np.arange(6).reshape(2, 3), dtype=int) + with pytest.raises(ValueError, match=">1d array"): + Vector.from_dense(np.arange(6), dtype="INT64[2]") + with pytest.raises(TypeError, match="from_scalar"): + Vector.from_dense(1) + + @pytest.mark.skipif("not suitesparse") def test_ss_sort(v): # For equal values, indices are guaranteed to be sorted @@ -2535,6 +2585,15 @@ def test_subarray_dtypes(): w = Vector.from_pairs([[1, [0, 1, 2, 3]], [3, [4, 5, 6, 7]], [5, [8, 9, 10, 11]]]) assert v.isequal(w, check_dtype=True) + filled1 = Vector.from_dense(v.to_dense(0)) + filled2 = v.dup() + filled2[[0, 2, 4]] = 0 + assert filled1.isequal(filled2, check_dtype=True) + filled1 = Vector.from_dense(v.to_dense([6, 5, 4, 3])) + filled2 = v.dup() + filled2[[0, 2, 4]] = [6, 5, 4, 3] + assert filled1.isequal(filled2, check_dtype=True) + full1 = Vector.from_coo([0, 1, 2], a) full2 = Vector("INT64[4]", size=3) full2[0] = [0, 1, 2, 3] @@ -2544,6 +2603,10 @@ def test_subarray_dtypes(): full2 = Vector("INT64[4]", size=3) full2[:] = a assert full1.isequal(full2, check_dtype=True) + full2 = Vector.from_dense(a) + assert full1.isequal(full2, check_dtype=True) + full2 = Vector.from_dense(full1.to_dense()) + assert full1.isequal(full2, check_dtype=True) if suitesparse: w = Vector.ss.import_sparse(indices=[1, 3, 5], values=a, size=6) assert v.isequal(w, check_dtype=True) diff --git a/graphblas/unary/__init__.py b/graphblas/unary/__init__.py index 92e508df3..a503b5141 100644 --- a/graphblas/unary/__init__.py +++ b/graphblas/unary/__init__.py @@ -17,6 +17,7 @@ def __getattr__(key): f"`{key}` is specific to SuiteSparse:GraphBLAS. " f"`gb.unary.{key}` will be removed in version 2023.9.0 or later.", DeprecationWarning, + stacklevel=2, ) rv = _deprecated[key] globals()[key] = rv diff --git a/pyproject.toml b/pyproject.toml index d23d2079a..f0cef8e0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -189,6 +189,7 @@ select = [ "C4", # flake8-comprehensions "DTZ", # flake8-datetimez "T10", # flake8-debugger + # "DJ", # flake8-django # "EM", # flake8-errmsg "EXE", # flake8-executable "ISC", # flake8-implicit-str-concat @@ -197,6 +198,7 @@ select = [ # "INP", # flake8-no-pep420 "PIE", # flake8-pie "T20", # flake8-print + # "PYI", # flake8-pyi "PT", # flake8-pytest-style "Q", # flake8-quotes # "RET", # flake8-return @@ -216,6 +218,7 @@ select = [ "TRY", # tryceratops # "RSE", # flake8-raise # "SLF", # flake8-self + "NPY", # NumPy-specific rules "RUF", # ruff-specific rules ] external = [ @@ -244,6 +247,8 @@ ignore = [ # Intentionally ignored "COM812", # Trailing comma missing "D203", # 1 blank line required before class docstring (Note: conflicts with D211, which is preferred) + "PLR0911", # Too many return statements + "PLR0912", # Too many branches "PLR0913", # Too many arguments to function call "PLR0915", # Too many statements "PLR2004", # Magic number used in comparison, consider replacing magic with a constant variable @@ -251,6 +256,7 @@ ignore = [ "PT003", # `scope='function'` is implied in `@pytest.fixture()` (Note: no harm in being explicit) "PT023", # Use `@pytest.mark.slow()` over `@pytest.mark.slow` (Note: why?) "S110", # `try`-`except`-`pass` detected, consider logging the exception (Note: good advice, but we don't log) + "S112", # `try`-`except`-`continue` detected, consider logging the exception (Note: good advice, but we don't log) "SIM102", # Use a single `if` statement instead of nested `if` statements (Note: often necessary) "SIM105", # Use contextlib.suppress(...) instead of try-except-pass (Note: try-except-pass is much faster) "SIM108", # Use ternary operator ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index f1fb8246e..14f39f18c 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -3,7 +3,7 @@ # Use, adjust, copy/paste, etc. as necessary to answer your questions. # This may be helpful when updating dependency versions in CI. # Tip: add `--json` for more information. -conda search 'numpy[channel=conda-forge]>=1.24.1' +conda search 'numpy[channel=conda-forge]>=1.24.2' conda search 'pandas[channel=conda-forge]>=1.5.3' conda search 'scipy[channel=conda-forge]>=1.10.0' conda search 'networkx[channel=conda-forge]>=3.0' @@ -11,5 +11,5 @@ conda search 'awkward[channel=conda-forge]>=2.0.7' conda search 'numba[channel=conda-forge]>=0.56.4' conda search 'pyyaml[channel=conda-forge]>=6.0' conda search 'flake8-comprehensions[channel=conda-forge]>=3.10.1' -conda search 'flake8-bugbear[channel=conda-forge]>=23.1.20' +conda search 'flake8-bugbear[channel=conda-forge]>=23.2.13' conda search 'flake8-simplify[channel=conda-forge]>=0.19.3'