+"""
+This module defines the base tensor class along with all of its essential
+attributes and special methods. Public math methods, e.g. ``sum``, ``mean``,
+etc., are bound to the Tensor class in ``mygrad.__init__.py``.
+"""
+
+from collections import deque
+from numbers import Integral, Number
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Deque,
+ Dict,
+ Iterator,
+ List,
+ Optional,
+ Sequence,
+ Set,
+ Tuple,
+ Type,
+ TypeVar,
+ Union,
+)
+from weakref import ReferenceType, finalize
+
+import numpy as np
+
+import mygrad._utils.duplicating_graph as _dup
+import mygrad._utils.graph_tracking as _track
+import mygrad._utils.lock_management as _mem
+from mygrad._numpy_version import NP_IS_V2
+from mygrad._tensor_core_ops.indexing import GetItem, SetItem
+from mygrad._utils import WeakRef, WeakRefIterable, collect_all_tensors_and_clear_grads
+from mygrad.errors import DisconnectedView
+from mygrad.math.arithmetic.ops import (
+ Add,
+ Divide,
+ Multiply,
+ Negative,
+ Positive,
+ Power,
+ Square,
+ Subtract,
+)
+from mygrad.math.misc.ops import MatMul
+from mygrad.math.sequential.ops import (
+ CumProd,
+ CumSum,
+ Max,
+ Mean,
+ Min,
+ Prod,
+ StdDev,
+ Sum,
+ Variance,
+)
+from mygrad.operation_base import Operation, _NoValue
+from mygrad.tensor_manip.array_shape.ops import Flatten, Ravel, Reshape, Squeeze
+from mygrad.tensor_manip.transpose_like.ops import (
+ MoveAxis,
+ SwapAxes,
+ Tensor_Transpose_Property,
+ Transpose,
+)
+from mygrad.typing import ArrayLike, DTypeLike, DTypeLikeReals, Index, Shape
+
+__all__ = ["Tensor", "asarray", "astensor", "implements_numpy_override"]
+
+if TYPE_CHECKING: # pragma: no cover
+ from mygrad.ufuncs._ufunc_creators import ufunc as mygrad_ufunc
+
+
+T = TypeVar("T")
+
+CONSTANT_ONLY_DTYPES = (np.integer, np.bool_)
+
+
+def _resolve_constant(*others: Any, constant: Optional[bool]) -> Optional[bool]:
+ """Determines if `constant` should be resolved to True based on `others`.
+ Otherwise defers to a tensor-creator to handle further resolutions based on dtype.
+ """
+ if constant is not None:
+ return constant
+ for other in others:
+ if isinstance(other, Tensor) and not other.constant:
+ # let subsequent tensor casting infer constant from dtype
+ return None
+ # all inputs are constants
+ return True
+
+
+[docs]def asarray(a: ArrayLike, dtype: DTypeLike = None, order: str = None) -> np.ndarray:
+
"""Convert the input to an array.
+
+
This docstring is adapted from that of ``numpy.asarray``
+
+
Parameters
+
----------
+
a : array_like
+
Input data, in any form - including a mygrad tensor - that can be converted to an array. This
+
includes lists, lists of tuples, tuples, tuples of tuples, tuples
+
of lists and ndarrays.
+
+
dtype : data-type, optional
+
By default, the data-type is inferred from the input data.
+
+
order : {'C', 'F'}, optional
+
Whether to use row-major (C-style) or
+
column-major (Fortran-style) memory representation.
+
Defaults to 'C'.
+
+
Returns
+
-------
+
out : ndarray
+
Array interpretation of `a`. No copy is performed if the input
+
is already an ndarray with matching dtype and order. If `a` is a
+
subclass of ndarray, a base class ndarray is returned.
+
+
Examples
+
--------
+
Convert a list into an array:
+
+
>>> import mygrad as mg
+
>>> a = [1, 2]
+
>>> mg.asarray(a)
+
array([1, 2])
+
+
Convert a tensor into an array. No copy of the
+
underlying numpy array is created:
+
+
>>> t = mg.Tensor([1, 2.])
+
>>> mg.asarray(t)
+
array([1., 2.])
+
>>> t.data is np.asarray(t))
+
True
+
+
Existing arrays are not copied:
+
+
>>> a = np.array([1, 2])
+
>>> mg.asarray(a) is a
+
True
+
+
If `dtype` is set, array is copied only if dtype does not match:
+
+
>>> a = np.array([1, 2], dtype=np.float32)
+
>>> mg.asarray(a, dtype=np.float32) is a
+
True
+
>>> mg.asarray(a, dtype=np.float64) is a
+
False
+
+
Contrary to `asanyarray`, ndarray subclasses are not passed through:
+
+
>>> issubclass(np.recarray, np.ndarray)
+
True
+
>>> a = np.array([(1.0, 2), (3.0, 4)], dtype='f4,i4').view(np.recarray)
+
>>> mg.asarray(a) is a
+
False
+
>>> np.asanyarray(a) is a
+
True
+
"""
+
if isinstance(a, Tensor):
+
a = a.data # faster than passing the tensor directly
+
return np.asarray(a, dtype=dtype, order=order)
+
+
+[docs]def tensor(
+
arr_like: ArrayLike,
+
dtype: DTypeLikeReals = None,
+
*,
+
constant: Optional[bool] = None,
+
copy: bool = True,
+
ndmin: int = 0,
+
) -> "Tensor":
+
"""
+
Create a tensor
+
+
This documentation was adapted from that of ``numpy.array`
+
+
Parameters
+
----------
+
arr_like : array_like
+
A tensor, any object exposing the array interface, an object whose
+
__array__ method returns an tensor, a real number, any (nested) sequence.
+
+
dtype : data-type, optional
+
The desired data-type for the tensor. Restricted to integer and float type.
+
If not specified, then the type will be determined as the minimum type required
+
to hold the objects in the sequence.
+
+
constant : Optional[bool]
+
If ``True``, this tensor is treated as a constant, and thus does not
+
facilitate back propagation (i.e. ``constant_tensor.grad`` will always
+
return ``None``).
+
+
If a new tensor is returned:
+
- Defaults to ``False`` for float-type data.
+
- Defaults to ``True`` for integer-type data.
+
+
copy : bool, optional
+
If true (default), or if a copy is needed to satisfy any of the
+
other requirements (``dtype``, ``constant``, etc.) then a new tensor
+
is created from copied data. Otherwise the tensor will be returned
+
unchanged.
+
+
ndmin : int, optional
+
Specifies the minimum number of dimensions that the resulting
+
tensor should have. Ones will be prepended to the shape as
+
needed to meet this requirement.
+
+
Returns
+
-------
+
out : Tensor
+
A tensor satisfying the specified requirements.
+
+
See Also
+
--------
+
empty_like : Return an empty tensor with shape and type of input.
+
ones_like : Return an tensor of ones with shape and type of input.
+
zeros_like : Return an tensor of zeros with shape and type of input.
+
full_like : Return a new tensor with shape of input filled with value.
+
empty : Return a new uninitialized tensor.
+
ones : Return a new tensor setting values to one.
+
zeros : Return a new tensor setting values to zero.
+
full : Return a new tensor of given shape filled with value.
+
+
Examples
+
--------
+
>>> import mygrad as mg
+
>>> mg.tensor([1, 2, 3])
+
Tensor([1, 2, 3])
+
+
Upcasting:
+
+
>>> mg.tensor([1, 2, 3.0])
+
Tensor([ 1., 2., 3.])
+
+
More than one dimension:
+
+
>>> mg.tensor([[1, 2], [3, 4]])
+
Tensor([[1, 2],
+
[3, 4]])
+
+
Minimum dimensions 2:
+
+
>>> mg.tensor([1, 2, 3], ndmin=2)
+
Tensor([[1, 2, 3]])
+
+
Type provided:
+
+
>>> mg.tensor([1, 2, 3], dtype="float32")
+
Tensor([1., 2., 3.], dtype=float32)
+
"""
+
+
if isinstance(arr_like, Tensor) and copy is False:
+
if (constant is None or arr_like.constant is constant) and (
+
dtype is None or (arr_like.dtype == np.dtype(dtype))
+
):
+
if not isinstance(ndmin, Integral):
+
raise TypeError(
+
f"TypeError: `ndmin` requires a non-negative integer (got type {type(ndmin)})"
+
)
+
if ndmin < 0:
+
ndmin = 0 # numpy does this
+
if ndmin > arr_like.ndim:
+
arr_like = arr_like[(*(None for _ in range(ndmin - arr_like.ndim)),)]
+
# return tensor as-as
+
return arr_like
+
+
return Tensor(arr_like, dtype=dtype, constant=constant, copy=copy, ndmin=ndmin)
+
+
+[docs]def astensor(
+
t: ArrayLike, dtype: DTypeLikeReals = None, *, constant: Optional[bool] = None
+
) -> "Tensor":
+
"""Convert the input to a tensor.
+
+
A tensor `t` is returned unchanged - its gradient and computational
+
graph state preserved - if dtype and constant are compatible.
+
A copy of the underlying numpy array is created only if dtype is
+
incompatible or if a non-constant tensor is being created from a constant.
+
+
Parameters
+
----------
+
t : array_like
+
Input data, in any form that can be converted to a tensor. This
+
includes lists, lists of tuples, tuples, tuples of tuples, tuples
+
of lists and ndarrays.
+
+
dtype : data-type, optional
+
By default, the data-type is inferred from the input data.
+
+
constant : Optional[bool]
+
By default, `constant` is inferred from `t` if `t` is a tensor,
+
otherwise it defaults to `False`.
+
+
Defaults to ``False`` for float-type data.
+
Defaults to ``True`` for integer-type data.
+
+
Integer-type tensors must be constant.
+
+
Returns
+
-------
+
out : Tensor
+
Tensor interpretation of `a`. No copy is performed if the input
+
is already a tensor with matching dtype and constant-flag.
+
+
Examples
+
--------
+
Convert a list into an array:
+
+
>>> import mygrad as mg
+
>>> import numpy as np
+
>>> t = [1, 2]
+
>>> mg.astensor(t)
+
Tensor([1, 2])
+
+
Convert an array into a tensor. No copy of the
+
underlying numpy array is created:
+
+
>>> a = np.array([1.0, 2.0])
+
>>> mg.astensor(a)
+
Tensor([1., 2.])
+
>>> a is mg.astensor(a).data
+
True
+
+
Existing tensors are not copied and their gradients and
+
computational graphs are preserved:
+
+
>>> t1 = 2 * mg.tensor([1, 2])
+
>>> t2 = mg.astensor(t1)
+
>>> t1 is t2
+
True
+
>>> t1.creator is t2.creator
+
True
+
+
If `dtype` is set, a new tensor is created - with copied data - only
+
if dtype does not match:
+
+
>>> t = mg.Tensor([1, 2], dtype=np.float32)
+
>>> mg.astensor(t, dtype=np.float32) is t
+
True
+
>>> mg.astensor(t, dtype=np.float64) is t
+
False
+
+
Otherwise, if `constant` is set, a new tensor is created (with
+
no copy of the underlying data) only if constant doesn't match.
+
+
>>> t1 = mg.tensor([1, 2], constant=False)
+
>>> mg.astensor(t1, constant=False) is t
+
True
+
>>> mg.astensor(t1, constant=True) is t1
+
False
+
>>> mg.astensor(t1, constant=True).data is t1.data
+
True
+
"""
+
return tensor(t, dtype=dtype, constant=constant, copy=False, ndmin=0)
+
+
+_REGISTERED_UFUNC: Dict[np.ufunc, Type["mygrad_ufunc"]] = {}
+_REGISTERED_DIFFERENTIABLE_NUMPY_FUNCS: Dict[
+ Callable[..., np.ndarray], Callable[..., "Tensor"]
+] = {}
+
+_REGISTERED_BOOL_ONLY_UFUNC: Set[np.ufunc] = {
+ np.isnan,
+ np.isfinite,
+ np.isinf,
+ np.isnat,
+ np.signbit,
+ np.logical_not,
+ np.logical_and,
+ np.logical_or,
+ np.logical_xor,
+ np.greater,
+ np.greater_equal,
+ np.less,
+ np.less_equal,
+ np.equal,
+ np.not_equal,
+}
+
+# These are ufuncs that users might mistake for being differentiable functions;
+# for this reason we make explicit the fact that only constant tensors are permitted
+# in these operations.
+_REGISTERED_CONST_ONLY_UFUNC = {
+ np.floor_divide,
+ np.remainder,
+ np.mod,
+ np.fmod,
+ np.divmod,
+ np.rint,
+ np.sign,
+ np.floor,
+ np.ceil,
+ np.trunc,
+}
+
+
+_REGISTERED_NO_DIFF_NUMPY_FUNCS: Set[Callable] = {
+ np.allclose,
+ np.bincount,
+ np.can_cast,
+ np.copyto,
+ np.isclose,
+ np.may_share_memory,
+ np.min_scalar_type,
+ np.result_type,
+ np.shares_memory,
+ np.shape,
+}
+
+
+class implements_numpy_override:
+ """Registers a mygrad-based override for a NumPy function of the same name, via
+ the standard __array_function__ interface. [1]_
+
+ Examples
+ --------
+ >>> @implements_numpy_override() # np.reshape to be overridden
+ ... def reshape(x, shape):
+ ... # a mygrad-based implementation of numpy.reshape
+ ... print("hello world")
+
+ >>> import numpy as np
+ >>> import mygrad as mg
+ >>> np.reshape(mg.tensor(1.), 2)
+ 'hello world'
+
+ You can also explicit provide the numpy function explicitly
+
+ >>> import numpy as np
+ >>> @implements_numpy_override(np.reshape) # np.reshape to be overridden
+ ... def some_function(x, shape):
+ ... pass
+
+ References
+ ----------
+ .. [1] https://numpy.org/devdocs/reference/arrays.classes.html?#numpy.class.__array_function__
+ """
+
+ __slots__ = ("numpy_func",)
+
+ def __init__(self, numpy_func: Optional[Callable] = None):
+ # if None, `numpy_func` is inferred from the name of the decorated function
+ self.numpy_func = numpy_func
+
+ def __call__(self, wrapped_func: T) -> T:
+ if self.numpy_func is None:
+ try:
+ self.numpy_func = getattr(np, wrapped_func.__name__)
+ except AttributeError:
+ raise AttributeError(
+ f"@implements_numpy_override tried to register an override for the function numpy.{wrapped_func.__name__}, but no "
+ f"such function exists."
+ )
+
+ _REGISTERED_DIFFERENTIABLE_NUMPY_FUNCS[self.numpy_func] = wrapped_func
+ return wrapped_func
+
+
+class _ConstantOnly(ValueError):
+ pass
+
+
+def _as_constant_array(t: Union["Tensor", np.ndarray]) -> np.ndarray:
+ """Passes through all non-tensor objects and constant tensors. Raises on
+ non-constant tensors."""
+ if isinstance(t, Tensor):
+ if t.constant is False:
+ raise _ConstantOnly()
+ return t.data
+ return t
+
+
+class Tensor:
+ """A numpy-array-like object capable of serving as a node in a computational
+ graph that supports back-propagation of derivatives via the chain rule.
+ See the Examples section of the docstring for more details.
+
+ Like the numpy array, mygrad's tensor stores data as an N-dimensional array
+ and provides an interface accessing, setting, and performing vectorized
+ operations along the various dimensions of this array. Vectorized operations
+ support numpy-style broadcasting semantics.
+
+ The contents of a tensor can be accessed and written to using all variety
+ of basic and advanced indexing (along with mixtures of the two).
+
+ Creating a Tensor
+ -----------------
+ ``mygrad.Tensor`` can be passed any "array-like" object of numerical data.
+ This includes numbers, sequences (e.g. lists), nested sequences, numpy-ndarrays,
+ and other mygrad-tensors. mygrad also provides familiar numpy-style tensor-creation
+ functions (e.g. ``mygrad.arange``, ``mygrad.linspace``, etc.)
+
+ >>> import mygrad as mg
+ >>> mg.tensor(2.3) # creating a 0-dimensional tensor
+ Tensor(2.3)
+ >>> mg.tensor(np.array([1.2, 3.0])) # casting a numpy-array to a tensor
+ Tensor([1.2, 3.0])
+ >>> mg.tensor([[1, 2], [3, 4]]) # creating a 2-dimensional tensor
+ Tensor([[1, 2],
+ [3, 4]])
+ >>> mg.arange(4) # using numpy-style tensor creation functions
+ Tensor([0, 1, 2, 3])
+
+ Creating a non-constant tensor will copy array data:
+
+ >>> import numpy as np
+ >>> arr = np.arange(10.)
+ >>> t_var = tensor(arr, constant=False)
+ >>> np.shares_memory(arr, t_var)
+ False
+
+ Creating constant tensor will not make a copy of the array data:
+
+ >>> t_const = mg.tensor(arr, constant=True)
+ >>> np.shares_memory(arr, t_const)
+ True
+
+ Forward and Back-Propagation
+ ----------------------------
+ Let's construct a computational graph consisting of two zero-dimensional
+ tensors, ``x`` and ``y``, which are used to compute an output tensor,
+ ````. This is a "forward pass imperative" style for creating a computational
+ graph - the graph is constructed as we carry out the forward-pass computation.
+
+ >>> x = mg.tensor(3.0)
+ >>> y = mg.tensor(2.0)
+ >>> ℒ = 2 * x + y ** 2
+
+ Invoking ``ℒ.backward()`` signals the computational graph to
+ compute the total-derivative of ``f`` with respect to each one of its dependent
+ variables. I.e. ``x.grad`` will store ``dℒ/dx`` and ``y.grad`` will store
+ ``dℒ/dy``. Thus we have back-propagated a gradient from ``f`` through our graph.
+
+ Each tensor of derivatives is computed elementwise. That is, if `x = Tensor(x0, x1, x2)`,
+ then dℒ/dx represents `[dℒ/d(x0), dℒ/d(x1), dℒ/d(x2)]`
+
+ >>> ℒ.backward() # computes df/dx and df/dy
+ >>> x.grad # df/dx
+ array(6.0)
+ >>> y.grad # df/dy
+ array(4.0)
+ >>> ℒ.grad
+ array(1.0) # dℒ/dℒ
+
+ Once the gradients are computed, the computational graph containing ``x``,
+ ``y``, and ``ℒ`` is cleared automatically. Additionally, involving any
+ of these tensors in a new computational graph will automatically null
+ their gradients.
+
+ >>> 2 * x
+ >>> x.grad is None
+ True
+
+ Or, you can use the ``tensor.null_grad()`` method to manually clear a
+ tensor's gradient
+
+ >>> y.null_grad()
+ Tensor(2.)
+ >>> y.grad is None
+ True
+
+ Accessing the Underlying NumPy Array
+ ------------------------------------
+ ``mygrad.Tensor`` is a thin wrapper on ``numpy.ndarray``. A tensor's
+ underlying numpy-array can be accessed via ``.data``:
+
+ >>> x = mg.tensor([1, 2])
+ >>> x.data
+ array([1, 2])
+
+ **Do not modify this underlying array**. Any in-place modifications made to this
+ array will not be tracked by any computational graph involving that tensor, thus
+ back-propagation through that tensor will likely be incorrect.
+
+ Producing a "View" of a Tensor
+ ------------------------------
+ MyGrad's tensors exhibit the same view semantics and memory-sharing relationships
+ as NumPy arrays. I.e. any (non-scalar) tensor produced via basic indexing will share
+ memory with its parent.
+
+ >>> x = mg.tensor([1., 2., 3., 4.])
+ >>> y = x[:2] # the view: Tensor([1., 2.])
+ >>> y.base is x
+ True
+ >>> np.shares_memory(x, y)
+ True
+
+ Mutating shared data will propagate through views:
+
+ >>> y *= -1
+ >>> x
+ Tensor([-1., -2., 3., 4.])
+ >>> y
+ Tensor([-1., -2.])
+
+ And this view relationship will also manifest between the tensors' gradients
+
+ >>> (x ** 2).backward()
+ >>> x.grad
+ array([-2., -4., 6., 8.])
+ >>> y.grad
+ array([-2., -4.])
+
+ In-Place Operations are not Efficient
+ =====================================
+ It is important to note that while MyGrad's view semantics promote a rich parity
+ with NumPy, that certain aspects should be avoided in the interest of optimized performance.
+ Namely, performing in-place operations on tensors is generally not more efficient than
+ their non-mutating counterparts.
+
+ This is because MyGrad has to track the state of tensors that are involved in a computational
+ graph. Thus a mutated tensor must have its pre-augmented state stored for future reference; this
+ defeats the performance benefit of writing to an array's memory in-place. This is especially
+ inefficient if you are mutating a tensor involved with multiple views of the same memory(
+ By contrast, producing a view of a tensor _is_ efficient as one would expect).
+
+ Thus these NumPy-like in-place semantics are supported by MyGrad not for the same performance
+ purposes, but instead to support convenient and familiar code-patterns and to enable one to
+ port NumPy code to MyGrad (or, in the future, inject MyGrad tensors into NumPy!!) and get
+ the exact same behavior.
+
+ A final note: MyGrad's in-place operations, when run under :func:`~mygrad.no_autodiff` mode,
+ do not incur the extra costs noted above, and thus your code will benefit from the performance
+ benefits of in-place operations.
+ """
+
+ __array_priority__ = 15.0
+
+ def __array_ufunc__(
+ self, ufunc: Type[np.ufunc], method: str, *inputs: ArrayLike, **kwargs
+ ) -> Union["Tensor", np.ndarray]:
+ """An interface provided by NumPy to override the behavior of its ufuncs [1]_.
+
+ MyGrad implements its own ufuncs for all differentiable NumPy ufuncs.
+
+ Non-differentiable numpy ufuncs simply get called on the underlying arrays of tensors and
+ will return ndarrays.
+
+ The differentiability - or lack thereof - of ufuncs may not be obvious to end users.
+ Thus potentially ambiguous ufuncs (e.g. `numpy.ceil`) will be made to raise on non-constant
+ tensors so that the lack of differentiability is made obvious to the users. This design decision
+ is made in the same spirit as requiring integer-dtype tensors be constant.
+
+ References
+ ----------
+ .. [1] https://numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
+
+ Examples
+ --------
+ NumPy ufuncs that represent differentiable operations are overloaded by MyGrad tensors
+ so that they support backprop
+
+ >>> import mygrad as mg
+ >>> import numpy as np
+
+ >>> x = mg.tensor([1., 2.])
+
+ This calls ``mygrad.sin`` under the hood.
+
+ >>> np.sin(x) # returns a tensor
+ Tensor([0.84147098, 0.90929743])
+
+ >>> np.sin(x).backward()
+ >>> x.grad # stores d(sin(x))/dx @ x = [1., 2.]
+ array([ 0.54030231, -0.41614684])
+
+ Specifying a dtype, a ``where`` mask, an in-place target (via ``out``) as an array
+ or a tensor, are all supported.
+
+ >>> x = mg.tensor([1., 2.])
+ >>> y = mg.tensor([-1., -1.])
+ >>> np.exp(x, where=[False, True], out=y)
+ Tensor([-1. , 7.3890561])
+ >>> y.backward()
+ >>> x.grad
+ array([0. , 7.3890561])
+
+ Non-differentiable NumPy ufuncs simply operate on the ndarrays that are wrapped
+ by MyGrad tensors; these return ndarrays, which will appropriately and explicitly
+ serve as constants elsewhere in a computational graph.
+
+ >>> x = mg.tensor([1., 2.])
+ >>> np.less_equal(x, 1)
+ array([ True, False])
+ """
+ out = kwargs.pop("out", (None,))
+ if len(out) > 1: # pragma: no cover
+ raise ValueError(
+ "mygrad does not support in-place operations with more that one target"
+ )
+ (out,) = out
+
+ out: Optional[Union[np.ndarray, "Tensor"]]
+
+ try:
+ # differentiable ufunc implemented by mygrad
+ return getattr(_REGISTERED_UFUNC[ufunc], method)(*inputs, **kwargs, out=out)
+ except KeyError:
+ pass
+
+ # non-differentiable ufuncs get called on numpy arrays stored by tensors
+ if ufunc in _REGISTERED_BOOL_ONLY_UFUNC:
+ caster = asarray
+ elif ufunc in _REGISTERED_CONST_ONLY_UFUNC:
+ # the presence of non-constant tensors will raise
+ caster = _as_constant_array
+ else: # pragma: no cover
+ return NotImplemented
+
+ try:
+ if out is not None:
+ kwargs["out"] = caster(out)
+ # returns ndarray
+ return getattr(ufunc, method)(*(caster(t) for t in inputs), **kwargs)
+ except _ConstantOnly:
+ raise ValueError(
+ f"{repr(ufunc)} cannot involve non-constant mygrad tensors."
+ )
+
+ def __array_function__(
+ self, func: Callable[..., np.ndarray], types, args, kwargs
+ ) -> Union["Tensor", np.ndarray]:
+ if func in _REGISTERED_DIFFERENTIABLE_NUMPY_FUNCS:
+ return _REGISTERED_DIFFERENTIABLE_NUMPY_FUNCS[func](*args, **kwargs)
+ elif func in _REGISTERED_NO_DIFF_NUMPY_FUNCS:
+ return func(
+ *(t.data if isinstance(t, Tensor) else t for t in args),
+ **{
+ k: (v.data if isinstance(v, Tensor) else v)
+ for k, v in kwargs.items()
+ },
+ )
+ else: # pragma: no cover
+ return NotImplemented
+
+ def __array__(
+ self, dtype: DTypeLike = None, copy: Optional[bool] = None
+ ) -> np.ndarray:
+ if NP_IS_V2:
+ return np.asarray(self.data, dtype=dtype, copy=copy)
+ else: # pragma: no cover
+ if copy is None:
+ copy = False
+ return np.array(self.data, dtype=dtype, copy=copy)
+
+ def __init__(
+ self,
+ x: ArrayLike,
+ *,
+ dtype: DTypeLikeReals = None,
+ constant: Optional[bool] = None,
+ copy: bool = True,
+ ndmin: int = 0,
+ _creator: Optional[Operation] = None,
+ _base: Optional["Tensor"] = None,
+ ):
+ """
+ Parameters
+ ----------
+ x : ArrayLike
+ Input data, in any form that can be converted to an array. This
+ includes numbers, sequences, nested sequences, numpy-ndarrays,
+ and mygrad-tensors.
+
+ dtype : DTypeLikeReals
+ `int`, `float`, or a real-valued numpy data type. By default the
+ data type is inferred from ``x`` via ``numpy.asarray(x)``.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. `self.grad` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ copy : Optional[bool]
+ Determines if the incoming array-data will be copied.
+
+ ndmin : int, optional
+ Specifies the minimum number of dimensions that the resulting
+ array should have. Ones will be prepended to the shape as
+ needed to meet this requirement.
+
+ Notes
+ -----
+ The following are parameters reserved only for internal use:
+
+ _creator : Optional[mygrad.Operation]
+ The operation-instance whose forward pass produced `self`. Should not
+ be set manually by users.
+
+ _base : Optional[Tensor]
+ Points to the tensor that ``self`` shares memory with.
+ """
+
+ if constant is not None and not isinstance(constant, bool):
+ raise TypeError(f"`constant` must be a boolean value, got: {constant}")
+
+ self._creator: Optional[Operation] = _creator
+
+ if not NP_IS_V2: # pragma: no cover
+ self.data = np.array(x, dtype=dtype, copy=copy, ndmin=ndmin)
+ else:
+ if copy is False:
+ self.data = np.asarray(x, dtype=dtype)
+ if not isinstance(ndmin, Integral):
+ raise TypeError(
+ f"'{type(ndmin)}' object cannot be interpreted as an integer"
+ )
+ if ndmin and self.data.ndim < ndmin:
+ self.data = self.data[
+ (*(None for _ in range(ndmin - self.data.ndim)),)
+ ]
+ else:
+ self.data = np.array(x, dtype=dtype, copy=copy, ndmin=ndmin)
+
+ dtype = self.data.dtype.type
+ is_float = issubclass(dtype, np.floating) # faster than `numpy.issubdtype`
+ if not is_float and _track.TRACK_GRAPH:
+ # No need to constrain dtypes if we aren't tracking the graph.
+ # Also, it is nice to enable complex arithmetic through mygrad
+ # functions that are wrapped in no_autodiff
+ if not issubclass(dtype, CONSTANT_ONLY_DTYPES):
+ raise TypeError(
+ f"Tensor data must be of an floating type, integer type, or boolean type, "
+ f"received {dtype}"
+ )
+
+ elif constant is False:
+ raise ValueError("Integer-valued tensors must be treated as constants.")
+
+ if constant is None:
+ # non-float: default constant -> True
+ # float: default constant -> False
+ constant = not is_float
+
+ self._constant = constant
+
+ self._grad = None # type: Union[None, np.ndarray]
+
+ # track all operations that this tensor participates in
+ self._ops: Set[WeakRef[Operation]] = set()
+
+ # base points to the initial tensor that owns the memory of this
+ # tensor
+ self._base = _base # type: Optional[Tensor]
+ # stores all of the tensors that are a view of this tensor
+ self._view_children = WeakRefIterable() # type: WeakRefIterable[Tensor]
+
+ # Used to reflect the view of the gradient associated with that of `self.base`.
+ # This is a means of distinguishing between the gradient set on `self` as
+ # part of backpropagation and the view of the gradient of its base.
+ self._view_grad: Optional[np.ndarray] = None
+
+ @property
+ def grad(self) -> Optional[np.ndarray]:
+ """
+ Returns the derivative of ``ℒ`` with respect to this tensor.
+
+ ``ℒ`` is the terminal node in the compuational graph from which
+ ``ℒ.backward()`` was invoked.
+
+ If this tensor is a view of another tensor then their gradients
+ will exhibit the same memory-sharing relationship as their data.
+
+ Returns
+ -------
+ dℒ/dx: numpy.ndarray
+ The gradient of the terminal node in a computational graph
+ with respect to this tensor. The shape of this numpy array
+ matches ``self.shape``
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> x = mg.Tensor([1.0, 2.0])
+
+ Prior to backpropagation tensors have ``None`` set for their gradients.
+
+ >>> x.grad is None
+ True
+
+ Now we trigger backpropagation...
+
+ >>> ℒ = x ** 2
+ >>> ℒ.backward()
+
+ and we see that ``x.grad`` stores dℒ/dx
+
+ >>> x.grad # dℒ/dx
+ array([2., 4.])
+
+ Now we will demonstrate the relationship between gradient a view tensor
+ and that of its base.
+
+ >>> base = mg.Tensor([1.0, 2.0, 3.0])
+ >>> view = base[:2]; view
+ Tensor([1., 2.])
+
+ >>> ℒ = base ** 2
+ >>> ℒ.backward()
+
+ Although ``view`` is not directly involved in the computation in ``ℒ``,
+ and thus would not typically store a gradient in due to ``ℒ.backward()``,
+ it shares memory with ``base`` and thus it stores a gradient in correspondence
+ to this "view relationship". I.e. because ``view == base[:2]``, then we expect
+ to find that ``view.grad == base.grad[:2]``.
+
+ >>> base.grad
+ array([2., 4., 6.])
+ >>> view.grad
+ array([2., 4.])
+
+ >>> view.grad.base is base.grad
+ True
+
+ The reasoning here is that, because a base tensor and its view share the same
+ array data, then varying an element in that data implies that both the base
+ tensor and the view will change (assuming the variation occurs specifically in
+ a shared region). It follows that the base tensor's gradient must share the same
+ relationship with the view-tensor since these are measures of "cause and effects"
+ associated with varying elements of data (albeit infinitesmaly).
+ """
+ if self._base is None:
+ return self._grad
+
+ if self._view_grad is not None and self._view_grad.base is self._base._grad:
+ # view grad has been computed already
+ return self._view_grad
+
+ if self._base._grad is None or self._creator is None:
+ # ``self`` had its graph, connecting it to its base, cleared.
+ # ``self._view_grad`` can't be computed without this info.
+ return None
+
+ (view_parent,) = self._creator.variables
+
+ # recursively fetches grad from parent
+ grad = view_parent.grad
+ with _track.no_autodiff:
+ self._view_grad = self._replay_op(grad).data if grad is not None else None
+ return self._view_grad
+
+[docs] def astype(
+
self,
+
dtype: DTypeLikeReals,
+
casting="unsafe",
+
copy: bool = True,
+
*,
+
constant: Optional[bool] = None,
+
) -> "Tensor":
+
"""Copy of the tensor with the specified dtype.
+
+
The resulting tensor is not involved in any computational graph
+
and has no gradient associated with it.
+
+
This docstring was adapted from that of ``ndarray.astype``.
+
+
Parameters
+
----------
+
dtype : Union[type, str]
+
The real-valued numeric data type. This can be a numpy dtype or
+
a corresponding string identifier.
+
+
casting : Literal['no', 'equiv', 'safe', 'same_kind', 'unsafe']
+
Controls what kind of data casting may occur. Defaults to ‘unsafe’ for backwards compatibility.
+
- ‘no’ means the data types should not be cast at all.
+
- ‘equiv’ means only byte-order changes are allowed.
+
- ‘safe’ means only casts which can preserve values are allowed.
+
- ‘same_kind’ means only safe casts or casts within a kind, like float64 to float32, are allowed.
+
- ‘unsafe’ means any data conversions may be done.
+
+
copy : bool, optional (default=True)
+
By default, astype always returns a newly allocated array. If this is set to false, and
+
the ``dtype`` and ``constant`` requirements are satisfied, the input tensor is returned
+
instead of a copy.
+
+
constant : Optional[bool]
+
If specified, determines if the returned tensor is a constant.
+
Otherwise this argument is inferred from the original tensor.
+
+
Returns
+
-------
+
Tensor
+
The resulting tensor with the specified data type.
+
+
References
+
----------
+
[1].. Retrieved from: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html
+
+
Examples
+
--------
+
>>> import mygrad as mg
+
>>> import numpy as np
+
>>> x = mg.arange(10); x
+
Tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+
+
Using a string to specify the data type:
+
+
>>> x.astype("float32")
+
Tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32)
+
+
Specifying a numpy data type object, and specifying that the
+
tensor is to be treated as a constant:
+
+
>>> x.astype(np.int8, constant=True)
+
Tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int8)
+
"""
+
cast_data = self.data.astype(dtype=dtype, casting=casting, copy=copy)
+
+
if cast_data is self.data and (constant is None or self.constant is constant):
+
return self
+
+
return type(self)(cast_data, copy=False, constant=constant)
+
+ @classmethod
+ def _op(
+ cls,
+ Op: Type[Operation],
+ *input_vars: ArrayLike,
+ op_args: Optional[Sequence] = None,
+ op_kwargs: Optional[Dict[str, Any]] = None,
+ constant: Optional[bool] = None,
+ out: Optional[Union[np.ndarray, "Tensor"]] = None,
+ ):
+ """Wraps operations performed between tensors: f(a, b, ...).
+
+ For developer use only.
+
+ Parameters
+ ----------
+ Op : Type[Operation]
+ Operation-class, used to perform forward-pass on `input_vars`.
+
+ input_vars : Tuple[array_like, ...]
+ An arbitrary number of input-tensors. These can take any form that
+ can be converted to an array. This includes numbers, sequences, nested
+ numerical sequences, numpy-ndarrays, and mygrad-tensors.
+
+ op_args : Optional[Tuple[Any, ...]]
+ Arbitrary positional arguments passed to the operation's forward pass.
+
+ op_kwargs : Optional[Dict[str, Any]]
+ Arbitrary keyword arguments passed to the operation's forward pass.
+
+ constant : bool, optional (default=False)
+ If True, the resulting Tensor is a constant.
+
+ out: Optional[Union[np.ndarray, "Tensor"]]
+ The target where the output (an ndarray) of the operation will be written.
+ Thus this raises if `out` is read-only.
+
+ There is an exception to this if a tensor is provided, in which case the
+ operation does not write to its underlying memory but rather triggers
+ "in-place semantics" so that the computational graph behaves as if the
+ tensor was mutated. See ``Tensor._in_place_op`` for more details.
+
+ Returns
+ -------
+ mygrad.Tensor
+ The tensor-result of the operation's forward-pass."""
+ if out is not None:
+ if isinstance(out, tuple):
+ if len(out) > 1: # pragma: no cover
+ raise ValueError(
+ "mygrad does not support in-place operations with more that one target"
+ )
+ (out,) = out
+
+ if isinstance(out, Tensor):
+ out._in_place_op(
+ Op,
+ *input_vars,
+ op_args=op_args,
+ op_kwargs=op_kwargs,
+ constant=constant,
+ )
+ return out
+
+ _uniques_bases_then_arrs = ()
+
+ tensor_vars = tuple(
+ cls(var, constant=True, copy=False) if not isinstance(var, Tensor) else var
+ for var in input_vars
+ )
+
+ # cast all input-vars to tensors
+ if _track.TRACK_GRAPH and _mem.MEM_GUARD:
+ # lock memory of array data
+ _uniques_bases_then_arrs = WeakRefIterable(
+ _mem.lock_arr_writeability(x)
+ for x in _mem.unique_arrs_and_bases(tensor_vars)
+ )
+
+ if op_args is None:
+ op_args = tuple()
+
+ if op_kwargs is None:
+ op_kwargs = {}
+
+ f = Op()
+
+ try:
+ if out is None:
+ op_out: np.ndarray = f(*tensor_vars, *op_args, **op_kwargs)
+ else:
+ op_out: np.ndarray = f(*tensor_vars, *op_args, **op_kwargs, out=out)
+ except Exception as e:
+ if _track.TRACK_GRAPH and _mem.MEM_GUARD:
+ _mem.release_writeability_lock_on_op(_uniques_bases_then_arrs)
+ raise e
+
+ if not _track.TRACK_GRAPH:
+ # execute operation without tracking creator or any graph
+ # information
+ return cls(
+ op_out,
+ constant=constant, # constant not determined by graph info
+ copy=False,
+ _creator=None,
+ _base=None,
+ )
+
+ # points to parent tensor that op-output is a view of
+ base = None # type: Optional[Tensor]
+
+ # If output of op is a view - tracks the tensor var that is
+ # the parent of the view
+ parent_var: Optional[Tensor] = None
+
+ # Determine whether or not op was a view; if so, `base`
+ # points to parent Tensor
+ op_out_base = op_out.base
+ if f.can_return_view and op_out_base is not None:
+ vars_can_share_mem = (
+ isinstance(var, (np.ndarray, Tensor)) for var in input_vars
+ )
+ for can_share_mem, parent_var in zip(vars_can_share_mem, tensor_vars):
+ if not can_share_mem:
+ continue
+ parent_data = parent_var.data
+ parent_data_base = parent_data.base
+
+ if (
+ (op_out_base is parent_data)
+ or (op_out_base is parent_data_base)
+ or (op_out is parent_data)
+ ):
+ if parent_var._base is not None and parent_var._creator is None:
+ parent_var._base = None
+
+ base = parent_var if parent_var.base is None else parent_var.base
+ break
+ else:
+ parent_var = None
+
+ for v in input_vars:
+ if isinstance(v, Tensor):
+ # tensor's graph has been cleared, but its base lingers
+ if v._base is not None and v._creator is None:
+ v._base = None
+
+ if base is None:
+ # non-view ops clear grads
+ v._grad = None
+ v._view_grad = None
+
+ if base is not None:
+ # we need to be able to replay view-ops for doing in-place operations
+ # on graphs with views
+ f.replay_args = op_args
+ f.replay_kwargs = op_kwargs
+ f.replay_force_constant = constant
+
+ # record graph information
+ if constant is None:
+ if any(not var.constant for var in tensor_vars):
+ constant = None
+ else:
+ constant = True
+
+ # record that a variable participated in that op
+ ref_f = ReferenceType(f) # type: WeakRef[Operation]
+ for var in tensor_vars:
+ var._ops.add(ref_f)
+
+ tensor_out = cls(
+ op_out,
+ constant=constant,
+ copy=False,
+ _creator=f,
+ _base=base,
+ )
+
+ if parent_var is not None:
+ parent_var._view_children.append(tensor_out)
+
+ if _mem.MEM_GUARD:
+ if out is not None and tensor_out.data.base is not None:
+ _mem.lock_arr_writeability(tensor_out.data.base)
+ _uniques_bases_then_arrs.append(tensor_out.data.base)
+ _mem.lock_arr_writeability(tensor_out.data)
+ tensor_refs = _uniques_bases_then_arrs
+ tensor_refs.append(tensor_out.data)
+ finalize(f, _mem.release_writeability_lock_on_op, tensor_refs)
+ return tensor_out
+
+ def _replay_op(self, *input_vars: ArrayLike) -> "Tensor":
+ """*dev use only*
+
+ Replays the op that produced `self` - called on the specified
+ input vars"""
+ if self.creator is None:
+ raise DisconnectedView(
+ "``Tensor._replay_op(...)`` was called on a tensor without a creator."
+ "\nPlease report this error at: https://github.com/rsokl/MyGrad/issues"
+ )
+ return self._op(
+ type(self.creator),
+ *input_vars,
+ op_args=self.creator.replay_args,
+ op_kwargs=self.creator.replay_kwargs,
+ constant=self.creator.replay_force_constant,
+ )
+
+[docs] def backward(self, grad: Optional[ArrayLike] = None):
+
"""Trigger backpropagation and compute the derivatives of this tensor.
+
+
Designating this tensor as the tensor ℒ, compute dℒ/dx for all (non-constant) tensors
+
that preceded ℒ in its computational graph, and store each of these derivatives in ``x.grad``
+
respectively.
+
+
Once back-propagation is finished, the present tensor is removed from all computational
+
graphs, and the preceding graph is cleared.
+
+
If ℒ is a non-scalar tensor (i.e. ``ℒ.ndim`` is greater than 0), then calling
+
``ℒ.backward()`` will behave as if ℒ was first reduced to a scalar via summation. I.e. it
+
will behave identically to ``ℒ.sum().backward()``; this ensures that each element of any
+
dℒ/dx will represent a derivative of a scalar function.
+
+
Parameters
+
----------
+
grad : Optional[array_like], (must be broadcast-compatible with ``self``
+
By default, the present tensor is treated as the terminus of the computational graph (ℒ).
+
Otherwise, one can specify a "downstream" derivative, representing ``dℒ/d(self)``.
+
This can be used to effectively connect otherwise separate computational graphs.
+
+
Examples
+
--------
+
>>> import mygrad as mg
+
>>> x = mg.tensor(2)
+
>>> y = mg.tensor(3)
+
>>> w = x * y
+
>>> ℒ = 2 * w
+
>>> ℒ.backward() # computes dℒ/dℒ, dℒ/dw, dℒ/dy, and dℒ/dx
+
+
>>> ℒ.grad # dℒ/df == 1 by identity
+
array(1.)
+
>>> w.grad # dℒ/dw
+
array(2.)
+
>>> y.grad # dℒ/dy = dℒ/dw * dw/dy
+
array(4.)
+
>>> x.grad # dℒ/dx = dℒ/dw * dw/dx
+
array(6.)
+
+
Calling ``ℒ.backward()`` from a non-scalar tensor is equivalent
+
to first summing that tensor.
+
+
>>> tensor = mg.tensor([2.0, 4.0, 8.0])
+
>>> ℒ = tensor * tensor[::-1] # [x0*x2, x1*x1, x2*x0]
+
>>> ℒ.backward() # behaves like ℒ = x0*x2 + x1*x1 + x2*x0
+
>>> tensor.grad
+
array([16., 8., 4.])
+
+
>>> tensor = mg.Tensor([2.0, 4.0, 8.0])
+
>>> ℒ = tensor * tensor[::-1]
+
>>> ℒ.sum().backward()
+
>>> tensor.grad
+
array([16., 8., 4.])
+
+
Specifying a value for ``grad``
+
+
>>> x = mg.Tensor(1.)
+
>>> x.backward(2.)
+
>>> x.grad # Would normally be dℒ/dℒ == 1
+
array(2.)
+
"""
+
if not _track.TRACK_GRAPH:
+
return
+
+
if self.constant:
+
self.clear_graph()
+
return
+
+
topo_sorted_tensors: Deque["Tensor"] = deque([])
+
seen: Set[int] = set()
+
+
collect_all_tensors_and_clear_grads(self, seen, topo_sorted_tensors)
+
+
# don't set self._grad yet because there is a grad-clearing step that
+
# occurs during graph creation
+
if grad is not None:
+
# `self` is guaranteed to be a tensor of floats
+
# so we can simply cast `grad` to be the same dtype
+
_grad = asarray(grad, dtype=self.dtype)
+
+
if _grad.shape != self.shape:
+
try:
+
# See if grad can broadcast to `self`
+
# raises ValueError if not
+
_grad = np.multiply(
+
np.full_like(self.data, fill_value=1.0),
+
_grad,
+
dtype=self.dtype,
+
)
+
if _grad.shape != self.shape:
+
# mutual broadcasting occurred
+
raise ValueError()
+
except ValueError:
+
raise ValueError(
+
f"`tensor.backward(grad)` was passed a gradient with an incompatible shape.\n"
+
f"`grad` must be broadcast-compatible with `tensor.shape={self.shape}`\n"
+
f"Got `grad.shape={_grad.shape}`"
+
)
+
else:
+
_grad = np.full_like(self.data, fill_value=1.0)
+
+
self._grad = _grad
+
+
if self.creator is not None:
+
for t in topo_sorted_tensors:
+
t._backward()
+
+
self.clear_graph()
+
+ def _backward(self):
+ """
+ **For dev-use only**
+
+ If `self` has accumulated incoming gradients from all operations in the terminal node's
+ computational graph, back-propagate the accumulated gradient to the creator of `self`.
+
+ Parameters
+ ----------
+ graph : Set[Operation]
+ The set of all operations relevant to the terminal node of the computational graph,
+ which triggered back-propagation
+
+ Raises
+ ------
+ AssertionError
+ Raises if the tensor and its associated gradient possess different shapes.
+ Raises if `_backward` triggered on a tensor with gradient of `None`.
+ """
+ assert self._grad is not None, (
+ f"backprop, post grad-accumulation, was triggered "
+ f"on a tensor with no gradient"
+ f"\n{self}"
+ f"\nid {id(self._ops)}"
+ f"\ngrad: {self.grad}"
+ f"\ncreator: {self.creator}"
+ f"\nops: {self._ops}"
+ f"\nbase: {self.base}"
+ )
+ assert self._grad.shape == self.shape, (
+ f"A tensor and its associated gradient must possess the same shape. Got:"
+ f"\ntensor-shape: {self.shape}"
+ f"\ngrad-shape: {self._grad.shape}"
+ )
+ if self._creator is not None:
+ self._creator.backward(self._grad)
+ return
+
+[docs] def null_grad(self, *, _clear_view_info: bool = False) -> "Tensor":
+
"""Sets this tensor's gradient to be ``None``.
+
+
This operation is performed in-place, but a reference to the
+
tensor is returned in order to permit mapping semantics.
+
+
Also removes any ``base`` reference from disconnected views.
+
+
Returns
+
-------
+
self
+
+
Examples
+
--------
+
>>> import mygrad as mg
+
>>> x = mg.Tensor(2.)
+
>>> (x ** 2).backward()
+
>>> x.grad
+
array(4.)
+
>>> x.null_grad() # returns a reference of `x`
+
Tensor(2.0)
+
>>> x.grad is None
+
True"""
+
self._view_grad = None
+
self._grad = None
+
+
if _clear_view_info:
+
if self._base is not None and self._creator is None:
+
self._base = None
+
+
return self
+
+[docs] def null_gradients(self, clear_graph: bool = True):
+
"""
+
**Deprecated: Tensors will automatically have their computational graphs cleared during backprop.
+
Simply involving a tensor in a new computational graph will null its gradient.**
+
+
Sets the gradient for this tensor and for all preceding tensors in the computation graph
+
to ``None``.
+
+
Additionally, the computational graph that terminates in this tensor can also be cleared
+
during this process.
+
+
Parameters
+
----------
+
clear_graph : bool, optional (default=True)
+
If ``True`` clear the computational graph in addition to nulling the gradients.
+
+
Notes
+
-----
+
It is advised to clear the computational graph when nulling gradients, i.e. invoke
+
``null_gradients(clear_graph=True)`` (or simply ``null_gradients()``). This de-references
+
all intermediate operations and tensors in the computational graph and thus permits
+
garbage collection - freeing the memory that was used by the computational graph.
+
+
Examples
+
--------
+
>>> import mygrad as mg
+
>>> x = mg.tensor(2)
+
>>> y = mg.tensor(3)
+
>>> w = x * y
+
>>> f = 2 * w
+
>>> f.backward() # computes df/df, df/dw, df/dy, and df/dx
+
>>> any(tensor.grad is None for tensor in (f, w , x, y))
+
False
+
+
>>> f.null_gradients() # set tensor.grad to None for all tensors in the graph
+
>>> all(tensor.grad is None for tensor in (f, w , x, y))
+
True
+
"""
+
import warnings
+
+
warnings.warn(
+
"`tensor.null_gradients()` is deprecated. Calling it will raise an error "
+
"in future versions of MyGrad. A tensor will automatically "
+
"have its gradient nulled if you use it in a new computational graph. "
+
"Or, you can call `tensor.null_grad()` to null that individual tensor's "
+
"gradient.",
+
FutureWarning,
+
)
+
+[docs] def clear_graph(self):
+
"""
+
Removes the current tensor – and tensors above it – from their shared
+
computational graph.
+
+
This de-references all operations involved in the graph and the intermediate
+
tensors that were created by it. Arrays whose memory were locked by the
+
computational graph will have their writeability restored.
+
+
Examples
+
--------
+
>>> import mygrad as mg
+
>>> import numpy as np
+
>>> x = np.array([1., 2.])
+
>>> y = mg.multiply(2., x)
+
>>> x.flags.writeable, y.creator
+
(False, <mygrad.math.arithmetic.ops.Multiply at 0x224f89cac48>)
+
>>> y.clear_graph()
+
>>> x.flags.writeable, y.creator
+
(True, None)
+
"""
+
if self._base is not None:
+
# "pull" on grad to force views to update their
+
# gradients from upstream before the graph info
+
# gets cleared
+
_ = self.grad
+
+
self._view_children.clear()
+
self._ops.clear()
+
+
if self._creator is None:
+
return
+
+
creator = self._creator
+
self._creator = None # marks tensor as "visited" during graph-traversal
+
+
for var in creator.variables: # type: "Tensor"
+
var.clear_graph()
+
+ @property
+ def constant(self) -> bool:
+ """If ``True``, this tensor is a constant; it will not propagate any gradient.
+
+ Additionally, any tensor that is a descendant of constant tensors will also
+ be a constant.
+
+ Integer-valued tesnors, Python scalars and NumPy arrays are treated as constant
+ tensors when included in MyGrad computational graphs.
+
+ Returns
+ -------
+ bool
+
+ Examples
+ --------
+ Constant-tensors do not back-propagate gradients:
+
+ >>> import mygrad as mg
+ >>> x = mg.Tensor([1., 2.], constant=True)
+ >>> y = mg.Tensor([0., 3.], constant=False)
+ >>> f = x * y
+ >>> f.backward()
+
+ >>> x.grad is None # x has no gradient
+ True
+ >>> y.grad
+ array([1., 2.])
+
+ A tensor that is derived solely from constant tensors is also
+ a constant:
+
+ >>> import numpy as np
+ >>> x = mg.Tensor([1., 2.], constant=True)
+ >>> y = mg.Tensor([0., 3.], constant=True)
+ >>> z = (x + y) ** 2 - np.array([8., 7.])
+ >>> z.constant
+ True
+
+ Integer-valued tensors are treated as constants
+
+ >>> mg.Tensor([1, 2]).constant
+ True
+ """
+ return self._constant
+
+ @property
+ def creator(self) -> Optional[Operation]:
+ """The ``Operation`` instance that produced ``self``.
+
+ Returns
+ -------
+ creator : Optional[Operation]
+ The operation-instance that created the tensor, or `None`.
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> x = mg.Tensor(3)
+ >>> x.creator is None
+ True
+ >>> y = mg.Tensor(2)
+ >>> z = x * y # Multiply(x, y) -> z
+ >>> z.creator
+ <mygrad.math.arithmetic.ops.Multiply at 0x2df5a130438>
+ """
+ return self._creator
+
+ def __len__(self) -> int:
+ return len(self.data)
+
+ def __contains__(self, item) -> bool:
+ return self.data.__contains__(item)
+
+ def __getitem__(self, item: Index) -> "Tensor":
+ return self._op(GetItem, self, op_args=(item,))
+
+ def __iter__(self) -> Iterator["Tensor"]:
+ # In the same way that numpy doesn't let you iterate over 0-dimensional
+ # arrays, don't allow iteration over 0-dimensional arrays.
+ if self.ndim == 0:
+ raise TypeError("iteration over a 0-d tensor")
+ return iter(self[n] for n in range(len(self)))
+
+ def _in_place_op(
+ self,
+ inplace_op: Type[Operation],
+ *input_vars: ArrayLike,
+ op_args: Optional[Sequence] = None,
+ op_kwargs: Optional[Dict] = None,
+ constant: Optional[bool] = None,
+ ):
+ if _track.TRACK_GRAPH is False:
+ return self._op(
+ inplace_op,
+ *input_vars,
+ op_args=op_args,
+ op_kwargs=op_kwargs,
+ constant=constant,
+ out=self.data,
+ )
+ #
+ # **********************************************************************************
+ # The way that in-place updates work in MyGrad is that any tensor that
+ # is about to undergo a mutation gets "cloned". Each resulting "placeholder"
+ # is used to represent that tensor in any non-view operations that the tensor
+ # was participating in. This ensures that the stateful computational graph
+ # is not corrupted by this mutation.
+ #
+ # Once the placeholders have been created, they have permanently replaced the
+ # rolls of their counterparts within the computational graph. Furthermore, they
+ # exist only internally to the computational graph and thus cannot be the
+ # targets of subsequent views or in-place updates.
+ #
+ # At this point, the "original" tensors merely reserve the publicly-available
+ # Tensor-instances (husks) that the users will access. We eventually need to
+ # populate these husks with the appropriate augmented contents and graph-history.
+ #
+ # Thus this method will compute the in-place operation on a new tensor, and
+ # will create a new, internal computational graph involving the base tensor
+ # affected by the mutation and any of its view-children. These tensors represent
+ # the mutated tensors that the users expect to have access to.
+ #
+ # We must connect this new computational graph to the preceding one – the one
+ # involving the placeholders; this way we can backpropagate appropriately and
+ # through all influencers.
+ #
+ # Finally we mirror each of these new tensors into the husks of the publicly
+ # -available tensors and reroute the computational graph through them so that
+ # the user sees that all of the relevant tensors have been augmented, and that
+ # they are connected to the appropriate "history" such that backprop occurs
+ # without error or inaccuracy.
+ #
+ #
+ # For illustration, consider the following graph:
+ #
+ # ... x------[square]-- y = x**2
+ # \
+ # ---[slice]-- z = view-x
+ # \
+ # ---[mul]-- w = 3 * z
+ #
+ # Now suppose that we mutate `x` with `x[:] = 0`. This is a simpler case than
+ # mutating a view of `x`, since `x` is already the base tensor.
+ # - This should not affect `y`
+ # - It should affect `view_x`
+ # - It should *not* affect `w`, which depends on `view_x` in a "static" way.
+ # I.e. the value for `w` is already resolved and is not a view of z or x.
+ #
+ #
+ # As prescribed above, we will make the placeholders: px and pz, and we
+ # will reroute the operations that statically depend on the old values of x and z
+ # through these placeholders.
+ #
+ # Next we will have `x` point to a mutated version of itself, in accord with the
+ # in-place update being performed, and we will subsequently recreate any
+ # views of x (i.e. z), based off of this mutated tensor.
+ #
+ # The resulting graph is:
+ #
+ # ---[slice]-- z = view-x
+ # /
+ # -----[set-item] -- x = px.copy()[:]=0
+ # /
+ # ... px------[square]-- y = px**2
+ # \
+ # ---[slice]-- pz = view-px
+ # \
+ # ---[mul]-- w = 3 * pz
+ #
+ # Note that px and pz are strictly *internal* tensors; they cannot be accessed for
+ # use in any further operations, whereas `x` and `z` are available for further use.
+ #
+ # **********************************************************************************
+ #
+ # Replace base and all of its views with "placeholder" tensors;
+ # they serve as internal references to all tensors pre-mutation
+ # and will preserve ops relying on the un-mutated tensors.
+ #
+ # These placeholder tensors are never publicly-available and thus cannot
+ # be involved directly in future in-place updates
+
+ # In Tensor._op, any tensor entering an op has its grad/view-info cleared
+ # We must do this here up front since we need to consume information
+ # about ``self``
+ self.null_grad(_clear_view_info=True)
+ if self._base is not None and not self._base._view_children:
+ self._base = None
+
+ graph = _dup.DuplicatingGraph(self if self.base is None else self.base)
+
+ # Create copy of base so that mutation has no impact on the
+ # state of any ops depending on it or its views
+ mutant_base = graph.base.tensor.copy()
+ mutant_base.data.flags.writeable = (
+ graph.base.tensor.data.flags.writeable
+ or _mem.array_is_tracked(graph.base.tensor.data)
+ )
+
+ # Create view of base in correspondence to relationship
+ # that `self` has to base. Mutating this view will mutate
+ # base appropriately
+ inplace_target = mutant_base
+
+ # stores view-fn sequence from base -> in-place target
+ view_fn_sequence: List[Callable[[np.ndarray], np.ndarray]] = []
+
+ with _track.no_autodiff:
+ # get view sequence from base -> in-place target
+ for node in graph.get_path_to_base(self)[::-1][1:]: # skip base
+ # need to point to place-holder replay op to avoid creating
+ # forwards references to downstream tensors
+ f = node.placeholder._replay_op
+ if self.base is not None:
+ # need sequence of view-ops
+ view_fn_sequence.append(_track.no_autodiff(f, to_numpy=True))
+ inplace_target = f(inplace_target)
+
+ # Constant info was not propagated through no-autodiff mode.
+ # It must be inferred from the original tensor
+ inplace_target._constant = mutant_base.constant
+
+ mutant_base_data = mutant_base.data
+ del mutant_base
+
+ try:
+ with _mem.mem_guard_off:
+ placeholder_mutant_view = (
+ self._op( # will raise if original data not writeable
+ inplace_op,
+ *(graph.get_placeholder_if_exists(t) for t in input_vars),
+ op_args=op_args,
+ op_kwargs=op_kwargs,
+ constant=constant,
+ out=inplace_target.data,
+ )
+ )
+ except Exception as e:
+ graph.restore_old_graph()
+ raise e
+
+ placeholder_mutant_view._constant = inplace_target._constant
+
+ if _mem.MEM_GUARD:
+ _mem.force_lock_tensor_and_creators(placeholder_mutant_view)
+
+ if placeholder_mutant_view.creator.where is not True:
+ # An operation like `multiply(x, y, where=mask, out=z)` occurred.
+ # `placeholder_mutant_view` is the mutated version of `z`.
+ # We need to connect the upstream version of `z` to the computational
+ # graph so that `~mask * dℒ/dz` backprops to it, whereas `~mask * dℒ/dz`
+ # will backprop to `x` and `y`.
+ #
+ # This is basically an alternative to treating
+ # `multiply(x, y, where=mask, out=z)`
+ # like a three-input operation, which adds complexity to the implementation
+ # of every op that supports `where` and `out`.
+ #
+ # old-z ---------------------
+ # | |
+ # multiply(x, y, where=mask, out=z) |
+ # | |
+ # z --------------------
+ # | |
+ # ApplyMask
+ # |
+ # z
+ with _mem.mem_guard_off:
+ placeholder_mutant_view = type(self)._op(
+ _dup.ApplyMask,
+ placeholder_mutant_view, # gets passed through unchanged
+ # ~mask * grad backprops to upstream placeholder
+ graph[self].placeholder,
+ op_kwargs={
+ "mask": placeholder_mutant_view.creator.where,
+ },
+ )
+
+ # Connect public base tensor to placeholder graph via the mutated placeholder
+ # tensor `out`.
+ if self.base is None:
+ # The current graph:
+ # base-p --> | inplace | --> vp'
+ # Becomes:
+ # base-p --> | inplace | --> base'
+ #
+ # The base tensor itself was the target of the in-place operation,
+ # thus we need simply mirror original base against the mutant placeholder.
+ # This effectively connects the original base to the placeholder graph
+ mutant_base = placeholder_mutant_view
+
+ else:
+ # in-place operation occurred on a view; must connect mutated base
+ # to graph and then reproduce downstream views
+ #
+ # The current graph:
+ # vp --> | inplace | --> vp'
+ #
+ # Becomes:
+ #
+ # vp --> | inplace | --> vp' --> | |
+ # | unview | --> base'
+ # base-p -----------------------> | |
+ #
+ # I.e. the mutated base is a combination of the placeholder
+ # base and of the mutant view.
+
+ mutant_base = type(self)._op(
+ _dup.UnView,
+ graph.base.placeholder,
+ placeholder_mutant_view,
+ op_kwargs={
+ # Copy to avoid upstream placeholder mutant view sharing memory
+ # with downstream mutant base
+ "mutant_base_data": mutant_base_data,
+ "view_fn_sequence": view_fn_sequence,
+ },
+ )
+
+ del placeholder_mutant_view
+
+ # The original base now points to the augmented array data
+ # and has the InPlaceOp as its creator
+ _dup.mirror_tensor(source=mutant_base, target=graph.base.tensor)
+
+ del mutant_base
+
+ # Now that the base-tensor has been incorporated into the graph,
+ # recreate the view-graph and reroute all tensors from previous
+ # graph to their downstream counterparts
+ #
+ # Note that iterating in a topologically-ordered way is critical
+ # here: each parent is updated before creating one of its children
+ #
+ # Iteration is always based off of the placeholders' relative positions
+ # in the graph since this will never be mutated.
+ for node in graph:
+ if node.parent is None:
+ continue
+ view = node.tensor._replay_op(node.parent)
+ _dup.mirror_tensor(source=view, target=node.tensor)
+ node.parent._view_children.append(node.tensor)
+
+ @property
+ def shape(self) -> Shape:
+ """Tuple of tensor dimension-sizes.
+
+ Sizes are reported in row-major order.
+
+ Returns
+ -------
+ Tuple[int, ...]
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> x = mg.Tensor([1, 2, 3, 4]) # axis-0 has size 4
+ >>> x.shape
+ (4,)
+ >>> y = mg.Tensor([[1, 2, 3], # axis-0 has size 2, axis-1 has size 3
+ ... [4, 5, 6]])
+ >>> y.shape
+ (2, 3)
+
+ The shape attribute can also be set to reshape the tensor in-place
+
+ >>> y.shape = (1, 6, 1)
+ >>> y
+ Tensor([[[1],
+ [2],
+ [3],
+ [4],
+ [5],
+ [6]]])
+
+ See Also
+ --------
+ mygrad.reshape : similar function
+ Tensor.reshape : similar method"""
+ return self.data.shape
+
+ @shape.setter
+ def shape(self, newshape: Union[int, Shape]):
+ # Even though this op cannot mutate views, we still must
+ # do graph-replaying here so that views can still reference
+ # this tensor, but with the proper reshaping mediating them.
+ #
+ # E.g.
+ # x = arange(10) # shape-(10,)
+ # y = x[:6] # shape-(6,)
+ # x.shape = (2, 5) # shape-(2, 5)
+ #
+ # y.base points to the shape-(2,5) array
+ # even though y is a view of the flat array
+ #
+ # thus we need to play this graph as
+ # (history)
+ # |
+ # placeholder shape-(10,)
+ # |-reshape
+ # x shape-(2,5)
+ # |-reshape
+ # placeholder shape-(10,)
+ # |-getitem
+ # y shape-(4,)
+
+ if not _track.TRACK_GRAPH:
+ self.data.shape = newshape
+ return
+
+ if newshape == self.shape:
+ return
+
+ old_shape = self.shape
+
+ # raise here if the shape is not compatible
+ self.data.shape = newshape
+ self.data.shape = old_shape
+
+ # create placeholders for self and all of its view-children
+ graph = _dup.DuplicatingGraph(self)
+ # need to iterate over all nodes now before we tinker
+ # with the view children
+ nodes = tuple(graph)
+
+ # reshape placeholder of self
+ out = graph.base.placeholder.reshape(newshape)
+
+ # Store contents of `out` in `self` and replace `out` in
+ # graph with `self`
+ out._base = graph.base.placeholder.base
+ _dup.mirror_tensor(source=out, target=self)
+ _dup.reroute_ops_through(source=out, target=self)
+ del out
+
+ # although `self` is a view of placeholder, placeholder
+ # is strictly an internal tensor, we won't expose it as
+ # base
+ graph.base.placeholder._view_children.append(self)
+ base = graph.base.placeholder.base
+
+ if base is not None:
+ # if `self` was a view, we need to update that parent's
+ # view children so that it points to the placeholder
+ creator = graph.base.placeholder.creator.variables[0]
+ creator._view_children = WeakRefIterable(
+ [
+ w if w is not self else graph.base.placeholder
+ for w in graph.base.placeholder._view_children
+ ]
+ )
+
+ # Undo the reshape, and place this as the tensor joining
+ # the reshaped `self` with the views of unshaped `self`
+ unshaped = self.reshape(old_shape)
+
+ for node in nodes:
+ if node.parent is None:
+ continue
+ # direct what would be views of `self` to be views of `unshaped`,
+ # which translates the mutated shape of `self` to the original
+ # shape used to create the views
+ parent = node.parent if node.parent is not self else unshaped
+ view = node.tensor._replay_op(parent)
+ _dup.mirror_tensor(source=view, target=node.tensor)
+ _dup.reroute_ops_through(source=view, target=node.tensor)
+ parent._view_children.append(node.tensor)
+
+ def __setitem__(self, key: Index, value: ArrayLike):
+ self._in_place_op(SetItem, self, value, op_args=(key,))
+
+ def __add__(self, other: ArrayLike) -> "Tensor":
+ return self._op(Add, self, other)
+
+ def __iadd__(self, other: ArrayLike) -> "Tensor":
+ self._in_place_op(Add, self, other)
+ return self
+
+ def __radd__(self, other: ArrayLike) -> "Tensor":
+ return self._op(Add, other, self)
+
+ def __sub__(self, other: ArrayLike) -> "Tensor":
+ return self._op(Subtract, self, other)
+
+ def __isub__(self, other: ArrayLike) -> "Tensor":
+ self._in_place_op(Subtract, self, other)
+ return self
+
+ def __rsub__(self, other: ArrayLike) -> "Tensor":
+ return self._op(Subtract, other, self)
+
+ def __truediv__(self, other: ArrayLike) -> "Tensor":
+ return self._op(Divide, self, other)
+
+ def __rtruediv__(self, other: ArrayLike) -> "Tensor":
+ return self._op(Divide, other, self)
+
+ def __floordiv__(self, other: ArrayLike) -> np.ndarray:
+ return np.floor_divide(self, other)
+
+ def __rfloordiv__(self, other: ArrayLike) -> np.ndarray:
+ return np.floor_divide(other, self)
+
+ def __itruediv__(self, other: ArrayLike) -> "Tensor":
+ self._in_place_op(Divide, self, other)
+ return self
+
+ def __mul__(self, other: ArrayLike) -> "Tensor":
+ return self._op(Multiply, self, other)
+
+ def __imul__(self, other: ArrayLike) -> "Tensor":
+ self._in_place_op(Multiply, self, other)
+ return self
+
+ def __rmul__(self, other: ArrayLike) -> "Tensor":
+ return self._op(Multiply, other, self)
+
+ def __matmul__(self, other: ArrayLike) -> "Tensor":
+ return self._op(MatMul, self, other)
+
+ def __rmatmul__(self, other: ArrayLike) -> "Tensor":
+ return self._op(MatMul, other, self)
+
+ def __pow__(self, other: ArrayLike):
+ if isinstance(other, Number) or (
+ isinstance(other, np.ndarray) and other.ndim == 0
+ ):
+ if other == 1:
+ return self._op(Positive, self)
+ elif other == 2:
+ return self._op(Square, self)
+
+ return self._op(Power, self, other)
+
+ def __ipow__(self, other: ArrayLike) -> "Tensor":
+ if isinstance(other, Number) or (
+ isinstance(other, np.ndarray) and other.ndim == 0
+ ):
+ if other == 1:
+ self._in_place_op(Positive, self)
+ return self
+ elif other == 2:
+ self._in_place_op(Square, self)
+ return self
+
+ self._in_place_op(Power, self, other)
+ return self
+
+ def __rpow__(self, other: ArrayLike):
+ return self._op(Power, other, self)
+
+ def __neg__(self):
+ return self._op(Negative, self)
+
+ def __pos__(self):
+ return self._op(Positive, self)
+
+ def __repr__(self) -> str:
+ return repr(self.data).replace("array", "Tensor").replace("\n", "\n ")
+
+ def __copy__(self) -> "Tensor":
+ """Produces a copy of ``self`` with ``copy.creator=None``.
+
+ Copies of the underlying numpy data array and gradient array are created.
+
+ Returns
+ -------
+ Tensor
+ """
+ return self.copy()
+
+[docs] def copy(self, *, constant: Optional[bool] = None) -> "Tensor":
+
"""Produces a copy of ``self`` with ``copy.creator=None``.
+
+
Copies of the underlying numpy data array and gradient array are created.
+
+
No information regarding the tensor's participation in the computational
+
graph are copied.
+
+
Parameters
+
----------
+
constant : Optional[bool]
+
+
Returns
+
-------
+
Tensor
+
+
Examples
+
--------
+
>>> import mygrad as mg
+
>>> x = mg.Tensor(data, constant=constant)
+
>>> y = x * 2
+
>>> y.backward()
+
>>> y_copy = y.copy()
+
>>> y_copy
+
Tensor(6)
+
>>> y_copy.grad
+
array(1.)
+
>>> y_copy.creator is None
+
True
+
"""
+
copy = Tensor(
+
np.copy(self.data),
+
constant=(self.constant if constant is None else constant),
+
)
+
copy._grad = np.copy(self._grad) if self._grad is not None else None
+
return copy
+
+[docs] def item(self) -> Union[int, float]:
+
"""Copy an element of a tensor to a standard Python scalar and return it.
+
+
Note that the returned object does not support back-propagation.
+
+
Returns
+
-------
+
z : Standard Python scalar object
+
A copy of the specified element of the tensor as a suitable
+
Python scalar
+
+
Examples
+
--------
+
>>> import mygrad as mg
+
>>> x = Tensor([22.2])
+
>>> x.item()
+
22.2
+
>>> type(x.item())
+
float"""
+
if self.size > 1:
+
raise ValueError("can only convert a tensor of size 1 to a Python scalar")
+
return self.data.item()
+
+ def __float__(self) -> float:
+ if self.size > 1:
+ raise TypeError("can only convert a tensor of size 1 to a Python scalar")
+ return float(self.data)
+
+ def __int__(self) -> int:
+ if self.size > 1:
+ raise TypeError("can only convert a tensor of size 1 to a Python scalar")
+ return int(self.data)
+
+ def __index__(self) -> int:
+ """Return self converted to an integer, if self is suitable for use as an index
+ into a list."""
+ return self.data.__index__()
+
+[docs] def flatten(self, *, constant: Optional[bool] = None) -> "Tensor":
+
"""Return a copy of the tensor collapsed into one dimension.
+
+
This docstring was adapted from ``numpy.ndarray.flatten``.
+
+
Parameters
+
----------
+
constant : bool, optional(default=False)
+
If ``True``, the returned tensor is a constant (it
+
does not back-propagate a gradient)
+
+
Returns
+
-------
+
mygrad.Tensor
+
A copy of the input tensor, flattened to one dimension.
+
+
Notes
+
-----
+
To return a flattened view of the tensor, use ``x.reshape(-1)``.
+
+
Examples
+
--------
+
>>> import mygrad as mg
+
>>> x = mg.Tensor([[1, 2],
+
... [3, 4]])
+
>>> x.flatten()
+
Tensor([1, 2, 3, 4])
+
"""
+
return Tensor._op(Flatten, self, constant=constant)
+
+ @property
+ def base(self) -> Optional["Tensor"]:
+ """
+ A reference to the base tensor that the present tensor is a view of.
+
+ It this tensor owns its memory, then this returns ``None``.
+
+ Examples
+ --------
+ The base of a tensor that owns its memory is ``None``:
+
+ >>> import mygrad as mg
+ >>> x = mg.arange(5)
+ >>> x.base is None
+ True
+
+ Slicing creates a view, whose memory is shared with x:
+
+ >>> y = x[2:]
+ >>> y.base is x
+ True
+ >>> y.data.base is x.data
+ True
+
+ A view of a view has the same base as its "parent"
+
+ >>> z = y[:]
+ >>> z.base is x
+ True
+
+ The behavior of ``Tensor.base`` departs from that of ``ndarray.base`` in that
+ mygrad will never create an "internal" tensor to serve as a base; e.g.
+
+ >>> import numpy as np
+ >>> np.reshape(2., (1,)).base
+ array(2.)
+
+ >>> mg.reshape(2., (1,)).base is None
+ True
+ """
+ return self._base
+
+ @property
+ def size(self) -> int:
+ """
+ Number of elements in the tensor. i.e., the product of the tensor's
+ dimensions.
+
+ Returns
+ -------
+ int
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> x = mg.zeros((3, 5, 2)) # creates a tensor with 3x5x2 (= 30) elements
+ >>> x.size
+ 30
+ """
+ return self.data.size
+
+ @property
+ def ndim(self) -> int:
+ """Number of tensor dimensions. I.e. the number
+ of indices that must be supplied to uniquely specify
+ an element in the tensor.
+
+ Returns
+ -------
+ int
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> x = mg.Tensor([1, 2, 3])
+ >>> x.ndim
+ 1
+ >>> x[0] # a single index identifies an element in `x`
+ Tensor(1)
+
+ >>> y = mg.Tensor([[1, 2, 3],
+ ... [4, 5, 6]])
+ >>> y.ndim
+ 2
+ >>> y[0, 0] # two indices are required to identify an element in `x`
+ Tensor(1)"""
+ return self.data.ndim
+
+ @property
+ def dtype(self) -> np.dtype:
+ """Data-type of the tensor's elements.
+
+ Returns
+ -------
+ numpy dtype object
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> x = mg.Tensor([[0, 1],
+ ... [2, 3]])
+ >>> x.dtype
+ dtype('int32')
+ >>> type(x.dtype)
+ <type 'numpy.dtype'>"""
+ return self.data.dtype
+
+ def reshape(
+ self, *newshape: Union[int, Shape], constant: Optional[bool] = None
+ ) -> "Tensor":
+ """Returns a tensor with a new shape, without changing its data.
+ This docstring was adapted from ``numpy.reshape``
+
+ Parameters
+ ----------
+ *newshape : Union[int, Tuple[int, ...]]
+ The new shape should be compatible with the original shape. If
+ an integer, then the result will be a 1-D tensor of that length.
+ One shape dimension can be -1. In this case, the value is
+ inferred from the length of the tensor and remaining dimensions.
+
+ constant : bool, optional(default=False)
+ If ``True``, the returned tensor is a constant (it
+ does not back-propagate a gradient)
+
+ Returns
+ -------
+ mygrad.Tensor
+ ``a`` with its shape changed. A new tensor is returned.
+
+ Notes
+ -----
+ ``reshape`` utilizes C-ordering, meaning that it reads & writes elements using
+ C-like index ordering; the last axis index changing fastest, and, proceeding
+ in reverse order, the first axis index changing slowest.
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> a = mg.Tensor([[1, 2, 3], [4, 5, 6]])
+ >>> a.reshape(6)
+ Tensor([1, 2, 3, 4, 5, 6])
+
+ >>> a.reshape(3, -1)) # the unspecified value is inferred to be 2
+ Tensor([[1, 2],
+ [3, 4],
+ [5, 6]])
+ """
+
+ if not newshape:
+ raise TypeError("reshape() takes at least 1 argument (0 given)")
+ if hasattr(newshape[0], "__iter__"):
+ if len(newshape) > 1:
+ raise TypeError("an integer is required")
+ newshape = newshape[0]
+ return Tensor._op(Reshape, self, op_args=(newshape,), constant=constant)
+
+ @property
+ def T(self) -> "Tensor":
+ """Same as self.transpose(), except that self is returned if self.ndim < 2 and
+ a view of the underlying data is utilized whenever possible.
+
+ Returns
+ -------
+ Tensor
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> y = mg.Tensor([[1, 2, 3],
+ ... [4, 5, 6]])
+ >>> y.T
+ Tensor([[1, 4],
+ [2, 5],
+ [3, 6]])
+ """
+ return self._op(Tensor_Transpose_Property, self)
+
+ def __eq__(self, other: ArrayLike) -> np.ndarray:
+ return np.ndarray.__eq__(self.data, asarray(other))
+
+ def __ne__(self, other: ArrayLike) -> np.ndarray:
+ return np.ndarray.__ne__(self.data, asarray(other))
+
+ def __lt__(self, other: ArrayLike) -> np.ndarray:
+ return np.ndarray.__lt__(self.data, asarray(other))
+
+ def __le__(self, other: ArrayLike) -> np.ndarray:
+ return np.ndarray.__le__(self.data, asarray(other))
+
+ def __gt__(self, other: ArrayLike) -> np.ndarray:
+ return np.ndarray.__gt__(self.data, asarray(other))
+
+ def __ge__(self, other: ArrayLike) -> np.ndarray:
+ return np.ndarray.__ge__(self.data, asarray(other))
+
+ def __imatmul__(self, other): # pragma: no cover
+ raise TypeError(
+ "In-place matrix multiplication is not (yet) supported. "
+ "Use 'a = a @ b' instead of 'a @= b'"
+ )
+
+ def sum(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ keepdims: bool = False,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """
+ Sum of tensor elements over a given axis.
+
+ Parameters
+ ----------
+ axis : Optional[int, Tuple[ints, ...]]
+ Axis or axes along which a sum is performed. The default,
+ axis=None, will sum all of the elements of the input tensor. If
+ axis is negative it counts from the last to the first axis.
+ If axis is a tuple of ints, a sum is performed on all of the axes
+ specified in the tuple instead of a single axis or all the axes as
+ before.
+
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the input tensor.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ sum_along_axis : mygrad.Tensor
+ A Tensor with the same shape as `self`, with the specified
+ axis/axes removed. If `self` is a 0-d tensor, or if `axis` is None,
+ a 0-dim Tensor is returned.
+
+ See Also
+ --------
+ mygrad.Tensor.sum : Equivalent method.
+
+ cumsum : Cumulative sum of array elements.
+
+ mean, average
+
+ Notes
+ -----
+ Arithmetic is modular when using integer types, and no error is
+ raised on overflow.
+
+ The sum of an empty tensor is the neutral element 0:
+
+ >>> mygrad.sum([])
+ Tensor(0.0)
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> import numpy as np
+ >>> x = mg.tensor([1., 1.])
+ >>> x.sum()
+ Tensor(2.0)
+ >>> x = mg.tensor([0.5, 0.7, 0.2, 1.5])
+ >>> x.sum(dtype=np.int32)
+ Tensor(1)
+ >>> x = mg.tensor([[0, 1], [0, 5]])
+ >>> x.sum()
+ Tensor(6)
+ >>> x.sum(axis=0)
+ Tensor([0, 6])
+ >>> x.sum(axis=1)
+ Tensor([1, 5])
+ """
+ return Tensor._op(
+ Sum, self, op_kwargs={"axis": axis, "keepdims": keepdims}, constant=constant
+ )
+
+ def prod(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ keepdims: bool = False,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """
+ Return the product of array elements over given axes.
+
+ Parameters
+ ----------
+ axis : Optional[Union[int, Tuple[int, ...]]]
+ Axis or axes along which to operate. By default, flattened input is used.
+
+ keepdims : bool, optional (default=False)
+ If this is set to True, the axes which are reduced are left in the
+ result as dimensions with size one. With this option, the result
+ will broadcast correctly against the input array.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ product_along_axis : mygrad.Tensor
+ A tensor shaped as `a` but with the specified axis removed."""
+ return Tensor._op(
+ Prod,
+ self,
+ op_kwargs={"axis": axis, "keepdims": keepdims},
+ constant=constant,
+ )
+
+ def cumprod(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """
+ Return the cumulative product of elements along a given axis.
+
+ This docstring was adapted from the official numpy documentation
+
+ Parameters
+ ----------
+ axis : Optional[int]
+ Axis along which the cumulative product is computed. By default
+ the input is flattened.
+
+ constant : bool, optional(default=False)
+ If ``True``, the returned tensor is a constant (it
+ does not back-propagate a gradient)
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ mygrad.Tensor
+
+ Notes
+ -----
+ Arithmetic is modular when using integer types, and no error is
+ raised on overflow."""
+
+ return Tensor._op(CumProd, self, op_kwargs={"axis": axis}, constant=constant)
+
+ def cumsum(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """
+ Return the cumulative sum of the elements along a given axis.
+
+ This docstring was adapted from the official numpy documentation
+
+ Parameters
+ ----------
+ axis : int, optional
+ Axis along which the cumulative sum is computed. The default
+ (None) is to compute the cumsum over the flattened array.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ mygrad.Tensor
+ """
+
+ return Tensor._op(CumSum, self, op_kwargs={"axis": axis}, constant=constant)
+
+ def mean(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ keepdims: bool = False,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """
+ Mean of tensor elements over a given axis.
+
+ Parameters
+ ----------
+ x : ArrayLike
+
+ axis : Optional[int, Tuple[ints, ...]
+ Axis or axes along which a mean is performed. The default,
+ axis=None, will mean all of the elements of the input tensor. If
+ axis is negative it counts from the last to the first axis.
+
+ If axis is a tuple of ints, a mean is performed on all of the axes
+ specified in the tuple instead of a single axis or all the axes as
+ before.
+
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the input tensor.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ mean_along_axis : Tensor
+ A Tensor with the same shape as `self`, with the specified
+ axis/axes removed. If `self` is a 0-d tensor, or if `axis` is None,
+ a 0-dim Tensor is returned.
+ """
+ return Tensor._op(
+ Mean,
+ self,
+ op_kwargs={"axis": axis, "keepdims": keepdims},
+ constant=constant,
+ )
+
+ def std(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ ddof: int = 0,
+ keepdims: bool = False,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """
+ Compute the standard deviation along the specified axis.
+
+ Returns the variance of the array elements, a measure of the spread of a
+ distribution. The variance is computed for the flattened array by
+ default, otherwise over the specified axis.
+
+ Parameters
+ ----------
+ axis : Optional[Union[int, Tuple[int, ...]]]
+ Axis or axes along which the variance is computed. The default is to
+ compute the variance of the flattened array.
+
+ ddof : int, optional (default=0)
+ "Delta Degrees of Freedom": the divisor used in the calculation is
+ ``N - ddof``, where ``N`` represents the number of elements. By
+ default `ddof` is zero.
+
+ keepdims : bool, optional (default=False)
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the input array.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ std : mygrad.Tensor
+
+ Notes
+ -----
+ The variance is the average of the squared deviations from the mean,
+ i.e., ``var = mean(abs(x - x.mean())**2)``.
+
+ The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
+ If, however, `ddof` is specified, the divisor ``N - ddof`` is used
+ instead. In standard statistical practice, ``ddof=1`` provides an
+ unbiased estimator of the variance of a hypothetical infinite population.
+ ``ddof=0`` provides a maximum likelihood estimate of the variance for
+ normally distributed variables."""
+ return Tensor._op(
+ StdDev,
+ self,
+ op_kwargs={"axis": axis, "keepdims": keepdims, "ddof": ddof},
+ constant=constant,
+ )
+
+ def var(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ ddof: int = 0,
+ keepdims: bool = False,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """
+ Compute the variance along the specified axis.
+
+ Returns the variance of the array elements, a measure of the spread of a
+ distribution. The variance is computed for the flattened array by
+ default, otherwise over the specified axis.
+
+ Parameters
+ ----------
+ axis : Optional[int, Tuple[int, ...]]
+ Axis or axes along which the variance is computed. The default is to
+ compute the variance of the flattened array.
+
+ ddof : int, optional (default=0)
+ "Delta Degrees of Freedom": the divisor used in the calculation is
+ ``N - ddof``, where ``N`` represents the number of elements. By
+ default `ddof` is zero.
+
+ keepdims : bool, optional (default=False)
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the input array..
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+ Returns
+ -------
+ variance : mygrad.Tensor
+
+ Notes
+ -----
+ The variance is the average of the squared deviations from the mean,
+ i.e., ``var = mean(abs(x - x.mean())**2)``.
+
+ The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
+ If, however, `ddof` is specified, the divisor ``N - ddof`` is used
+ instead. In standard statistical practice, ``ddof=1`` provides an
+ unbiased estimator of the variance of a hypothetical infinite population.
+ ``ddof=0`` provides a maximum likelihood estimate of the variance for
+ normally distributed variables."""
+ return Tensor._op(
+ Variance,
+ self,
+ op_kwargs={"axis": axis, "keepdims": keepdims, "ddof": ddof},
+ constant=constant,
+ )
+
+ def max(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ keepdims: bool = False,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """
+ Return the maximum of a tensor or maximum along its axes.
+
+ Parameters
+ ----------
+ x : ArrayLike
+
+ axis : Optional[int, Tuple[int, ...]]
+ Axis or axes along which to operate. By default, flattened input is used.
+
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the original `arr`.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ max : mygrad.Tensor
+ Maximum of `a`. If `axis` is None, the result is a 0-D tensor.
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> import numpy as np
+ >>> a = mg.arange(4).reshape((2,2))
+ >>> a
+ Tensor([[0, 1],
+ [2, 3]])
+ >>> mg.amax(a) # Maximum of the flattened array
+ Tensor(3)
+ >>> mg.amax(a, axis=0) # Maxima along the first axis
+ Tensor([2, 3])
+ >>> mg.amax(a, axis=1) # Maxima along the second axis
+ Tensor([1, 3])
+ >>> b = mg.arange(5, dtype=float)
+ >>> b[2] = np.NaN
+ >>> mg.amax(b)
+ Tensor(nan)
+ """
+ return Tensor._op(
+ Max,
+ self,
+ op_kwargs={"axis": axis, "keepdims": keepdims, "dtype": _NoValue},
+ constant=constant,
+ )
+
+ def min(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ keepdims: bool = False,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """
+ Return the minimum of a tensor or minimum along its axes.
+
+ Parameters
+ ----------
+ axis : Optional[int, Tuple[int, ...]]
+ Axis or axes along which to operate. By default, flattened input is used.
+
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the original `arr`.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ min : mygrad.Tensor
+ Minimum of `a`. If `axis` is None, the result is a 0-D tensor.
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> import numpy as np
+ >>> a = mg.arange(4).reshape((2,2))
+ >>> a
+ Tensor([[0, 1],
+ [2, 3]])
+ >>> mg.amin(a) # Minimum of the flattened array
+ Tensor(0)
+ >>> mg.amin(a, axis=0) # Minima along the first axis
+ Tensor([0, 1])
+ >>> mg.amin(a, axis=1) # Minima along the second axis
+ Tensor([0, 2])
+ >>> b = mg.arange(5, dtype=float)
+ >>> b[2] = np.NaN
+ >>> mg.amin(b)
+ Tensor(nan)
+ """
+ return Tensor._op(
+ Min,
+ self,
+ op_kwargs={"axis": axis, "keepdims": keepdims, "dtype": _NoValue},
+ constant=constant,
+ )
+
+ def swapaxes(
+ self, axis1: int, axis2: int, *, constant: Optional[bool] = None
+ ) -> "Tensor":
+ """Interchange two axes of a tensor.
+
+ Parameters
+ ----------
+ axis1 : int
+ First axis.
+
+ axis2 : int
+ Second axis.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ mygrad.Tensor
+ """
+ return Tensor._op(SwapAxes, self, op_args=(axis1, axis2), constant=constant)
+
+ def transpose(
+ self: ArrayLike, *axes: int, constant: Optional[bool] = None
+ ) -> "Tensor":
+ """Permute the dimensions of a tensor.
+
+ Parameters
+ ----------
+ axes : int
+ By default, reverse the dimensions, otherwise permute the axes
+ according to the values given.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ mygrad.Tensor
+ `a` with its axes permuted. A new tensor is returned.
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> a = mg.tensor([[1, 2], [3, 4]])
+ >>> a
+ Tensor([[1, 2],
+ [3, 4]])
+ >>> a.transpose()
+ Tensor([[1, 3],
+ [2, 4]])
+ >>> a.transpose((1, 0))
+ Tensor([[1, 3],
+ [2, 4]])
+ >>> a.transpose(1, 0)
+ Tensor([[1, 3],
+ [2, 4]])"""
+ if not axes:
+ axes = None
+ elif hasattr(axes[0], "__iter__") or axes[0] is None:
+ if len(axes) > 1:
+ raise TypeError(
+ f"'{type(axes[0])}' object cannot be interpreted as an integer"
+ )
+ axes = axes[0]
+ return Tensor._op(Transpose, self, op_args=(axes,), constant=constant)
+
+ def moveaxis(
+ self,
+ source: Union[int, Tuple[int, ...]],
+ destination: Union[int, Tuple[int, ...]],
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """Move axes of a tensor to new positions. Other axes remain in their
+ original order.
+
+
+ Parameters
+ ----------
+ source : Union[int, Sequence[int]]
+ Original positions of the axes to move. These must be unique.
+
+ destination : Union[int, Sequence[int]]
+ Destination positions for each of the original axes. These must also be
+ unique.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+ Returns
+ -------
+ result : mygrad.Tensor
+ Array with moved axes. This array is a view of the input array.."""
+ return Tensor._op(
+ MoveAxis, self, op_args=(source, destination), constant=constant
+ )
+
+ def squeeze(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor":
+ """
+ Remove single-dimensional entries from the shape of a tensor.
+
+ This docstring was adapted from ``numpy.squeeze``
+
+ Parameters
+ ----------
+ axis : Optional[int, Tuple[int, ...]]
+ Selects a subset of the single-dimensional entries in the
+ shape. If an axis is selected with shape entry greater than
+ one, an error is raised.
+
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+
+ Returns
+ -------
+ mygrad.Tensor
+
+ Raises
+ ------
+ ValueError
+ If ``axis`` is not ``None``, and an axis being squeezed is not of length 1
+ """
+ return Tensor._op(Squeeze, self, op_args=(axis,), constant=constant)
+
+ def ravel(self, *, constant: Optional[bool] = None) -> "Tensor":
+ """
+ Flattens contents of a tensor into a contiguous 1-D array. A copy is made only if needed.
+
+ This docstring was adapted from ``numpy.ravel``.
+
+ Parameters
+ ----------
+ constant : Optional[bool]
+ If ``True``, this tensor is treated as a constant, and thus does not
+ facilitate back propagation (i.e. ``constant.grad`` will always return
+ ``None``).
+
+ Defaults to ``False`` for float-type data.
+ Defaults to ``True`` for integer-type data.
+
+ Integer-type tensors must be constant.
+
+
+ Returns
+ -------
+ mygrad.Tensor
+
+ Notes
+ -----
+ ``ravel`` utilizes C-ordering, meaning that it reads & writes elements using
+ C-like index ordering; the last axis index changing fastest, and, proceeding
+ in reverse order, the first axis index changing slowest.
+ """
+ return Tensor._op(Ravel, self, constant=constant)
+
+ def argmax(
+ self, axis: Optional[int] = None, out: Optional[np.ndarray] = None
+ ) -> np.ndarray:
+ """Returns the indices of the maximum values along an axis.
+
+ Parameters
+ ----------
+ a: array_like
+
+ axis: int, optional
+ By default, the index is into the flattened array, otherwise along the specified axis.
+
+ out: numpy.array, optional
+ If provided, the result will be inserted into this array. It should be of the appropriate shape and dtype.
+
+ Returns
+ -------
+ numpy.ndarray[int]"""
+
+ return np.argmax(self.data, axis, out)
+
+ def argmin(
+ self, axis: Optional[int] = None, out: Optional[np.ndarray] = None
+ ) -> np.ndarray:
+ """Returns the indices of the minimum values along an axis.
+
+ Parameters
+ ----------
+ axis: int, optional
+ By default, the index is into the flattened array, otherwise along the specified axis.
+
+ out: numpy.array, optional
+ If provided, the result will be inserted into this array. It should be of the appropriate shape and dtype.
+
+ Returns
+ -------
+ numpy.ndarray[int]"""
+
+ return np.argmin(self.data, axis, out)
+
+ def any(
+ self,
+ axis: Optional[Union[int, Tuple[int, ...]]] = None,
+ out: Optional[np.ndarray] = None,
+ keepdims: bool = False,
+ ) -> np.ndarray:
+ """Test whether any array or Tensor element along a given axis evaluates to True.
+
+ Returns single boolean if `axis` is ``None``
+
+ This documentation was adapted from ``numpy.add``
+
+ Parameters
+ ----------
+ axis : None or int or tuple of ints, optional
+ Axis or axes along which a logical OR reduction is performed.
+ The default (``axis=None``) is to perform a logical OR over all
+ the dimensions of the input array. `axis` may be negative, in
+ which case it counts from the last to the first axis.
+ If this is a tuple of ints, a reduction is performed on multiple
+ axes, instead of a single axis or all the axes as before.
+
+ out : ndarray, optional
+ Alternate output array in which to place the result. It must have
+ the same shape as the expected output and its type is preserved
+ (e.g., if it is of type float, then it will remain so, returning
+ 1.0 for True and 0.0 for False, regardless of the type of `a`).
+ See `ufuncs-output-type` for more details.
+
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the input array.
+ If the default value is passed, then `keepdims` will not be
+ passed through to the `any` method of sub-classes of
+ `ndarray`, however any non-default value will be. If the
+ sub-class' method does not implement `keepdims` any
+ exceptions will be raised.
+
+ Returns
+ -------
+ any : bool or ndarray
+ A new boolean or `ndarray` is returned unless `out` is specified,
+ in which case a reference to `out` is returned.
+
+ See Also
+ --------
+ Tensor.any : equivalent method
+
+ """
+ return np.any(self.data, axis=axis, out=out, keepdims=keepdims)
+
+ def clip(
+ self,
+ a_min: ArrayLike,
+ a_max: ArrayLike,
+ out: Optional[Union[np.ndarray, "Tensor"]] = None,
+ *,
+ constant: Optional[bool] = None,
+ ) -> "Tensor": # pragma: no cover
+ """Clip (limit) the values in an array.
+
+ Given an interval, values outside the interval are clipped to
+ the interval edges. For example, if an interval of ``[0, 1]``
+ is specified, values smaller than 0 become 0, and values larger
+ than 1 become 1.
+
+ Equivalent to `mg.minimum(a_max, mg.maximum(a, a_min))``.
+
+ No check is performed to ensure ``a_min < a_max``.
+
+ This docstring was adapted from that of `numpy.clip`
+
+ Parameters
+ ----------
+ a_min : Optional[float, ArrayLike]
+ Minimum value. If `None`, clipping is not performed on lower
+ interval edge. Not more than one of `a_min` and `a_max` may be
+ `None`.
+
+ a_max : Optional[float, ArrayLike]
+ Maximum value. If `None`, clipping is not performed on upper
+ interval edge. Not more than one of `a_min` and `a_max` may be
+ `None`. If `a_min` or `a_max` are ArrayLike, then the three
+ arrays will be broadcasted to match their shapes.
+
+ out : Optional[Union[ndarray, Tensor]]
+ A location into which the result is stored. If provided, it must have
+ a shape that the inputs broadcast to. If not provided or None, a
+ freshly-allocated tensor is returned.
+
+ constant : bool, optional(default=False)
+ If ``True``, the returned tensor is a constant (it
+ does not backpropagate a gradient)
+
+ Returns
+ -------
+ Tensor
+ A tensor with the elements of `a`, but where values
+ < `a_min` are replaced with `a_min`, and those > `a_max`
+ with `a_max`.
+
+ Examples
+ --------
+ >>> import mygrad as mg
+ >>> a = mg.arange(10)
+ >>> a
+ Tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+ >>> a.clip(1, 8)
+ Tensor([1, 1, 2, 3, 4, 5, 6, 7, 8, 8])
+ >>> a.clip([3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8)
+ Tensor([3, 4, 2, 3, 4, 5, 6, 7, 8, 8])"""
+ # set in added in mygrad.__init__
+ ...
+
+
+