Skip to content

Commit

Permalink
support "T" and "V" dtypes in from_dtype
Browse files Browse the repository at this point in the history
  • Loading branch information
tybug committed Jan 3, 2025
1 parent 8250483 commit 10ef133
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 1 deletion.
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RELEASE_TYPE: minor

:func:`~hypothesis.extra.numpy.from_dtype` now supports the :obj:`numpy.dtypes.VoidDType` (``"V``) dtype, as well as the new :obj:`numpy:numpy.dtypes.StringDType` (``"T"``) dtype in NumPy 2.0.
40 changes: 39 additions & 1 deletion hypothesis-python/src/hypothesis/extra/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,15 @@ def _try_import(mod_name: str, attr_name: str) -> Any:

TIME_RESOLUTIONS = tuple("Y M D h m s ms us ns ps fs as".split())

numpy_version = tuple(map(int, np.__version__.split(".")[:2]))
# See https://github.com/HypothesisWorks/hypothesis/pull/3394 and linked discussion.
NP_FIXED_UNICODE = tuple(int(x) for x in np.__version__.split(".")[:2]) >= (1, 19)
NP_FIXED_UNICODE = numpy_version >= (1, 19)

maybe_string_dtype = []
if numpy_version >= (2, 0):
from numpy.dtypes import StringDType

maybe_string_dtype = [st.just(StringDType())]


@defines_strategy(force_reusable_values=True)
Expand Down Expand Up @@ -213,6 +220,17 @@ def compat_kw(*args, **kw):
else: # NEP-7 defines the NaT value as integer -(2**63)
elems = st.integers(-(2**63) + 1, 2**63 - 1)
result = st.builds(dtype.type, elems, res)
elif dtype.kind == "T":
result = st.text(**compat_kw("alphabet", "min_size", "max_size"))
elif dtype.kind == "V":
result = st.binary(
**compat_kw(
"min_size", max_size=None if dtype.itemsize == 0 else dtype.itemsize
)
)
# we explicitly avoid supporting dtype.kind == "O", because it is easy to
# OOM when evaluating e.g. np.array(range(0, n)) for large n (and this is in
# fact a thing hypothesis will generate via st.from_type(object)).
else:
raise InvalidArgument(f"No strategy inference for {dtype}")
return result.map(dtype.type)
Expand Down Expand Up @@ -927,6 +945,9 @@ def timedelta64_dtypes(
)


# TODO: we should uncap max_len here, and for unicode/void below.
# Also allow generating undetermined-width dtypes like "S" / "S0"? Possibly with
# a new parameter allow_undetermined?
@defines_dtype_strategy
def byte_string_dtypes(
*, endianness: str = "?", min_len: int = 1, max_len: int = 16
Expand Down Expand Up @@ -957,6 +978,21 @@ def unicode_string_dtypes(
return dtype_factory("U", list(range(min_len, max_len + 1)), None, endianness)


@defines_dtype_strategy
def void_dtypes(
*, endianness: str = "?", min_len: int = 1, max_len: int = 16
) -> st.SearchStrategy["np.dtype[np.void]"]:
"""Return a strategy for generating void dtypes, of various lengths
and byteorder.
While Hypothesis' st.binary strategy can generate empty bytestrings, void
dtypes with length 0 indicate that size is still to be determined, so
the minimum length for void dtypes is 1.
"""
order_check("len", 1, min_len, max_len)
return dtype_factory("V", list(range(min_len, max_len + 1)), None, endianness)


def _no_title_is_name_of_a_titled_field(ls):
seen = set()
for title_and_name, *_ in ls:
Expand Down Expand Up @@ -1336,6 +1372,8 @@ def _from_type(thing: type[Ex]) -> Optional[st.SearchStrategy[Ex]]:
# Note: Parameterized dtypes and DTypeLike are not supported.
return st.one_of(
scalar_dtypes(),
void_dtypes(),
*maybe_string_dtype,
byte_string_dtypes(),
unicode_string_dtypes(),
array_dtypes(),
Expand Down
20 changes: 20 additions & 0 deletions hypothesis-python/tests/numpy/test_from_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"complex128",
"datetime64",
"timedelta64",
"void",
bool,
str,
bytes,
Expand Down Expand Up @@ -103,6 +104,14 @@ def test_unicode_string_dtypes_generate_unicode_strings(data):
assert isinstance(result, str)


@given(st.data())
def test_void_dtype_generates_void(data):
dtype = data.draw(nps.void_dtypes())
value = data.draw(nps.from_dtype(dtype))
assert isinstance(value, np.void)
assert isinstance(value.tobytes(), bytes)


@given(nps.arrays(dtype="U99", shape=(10,)))
def test_can_unicode_strings_without_decode_error(arr):
# See https://github.com/numpy/numpy/issues/15363
Expand All @@ -129,6 +138,7 @@ def test_byte_string_dtypes_generate_unicode_strings(data):


skipif_np2 = pytest.mark.skipif(np_version >= (2, 0), reason="removed in new version")
skipif_np1 = pytest.mark.skipif(np_version < (2, 0), reason="added in new version")


@pytest.mark.parametrize(
Expand Down Expand Up @@ -251,6 +261,16 @@ def test_arrays_gives_useful_error_on_inconsistent_time_unit():
("U", {"min_size": 1, "max_size": 2}, lambda x: 1 <= len(x) <= 2),
("U4", {"min_size": 1, "max_size": 2}, lambda x: 1 <= len(x) <= 2),
("U", {"alphabet": "abc"}, lambda x: set(x).issubset("abc")),
pytest.param(
"T", {"alphabet": "abc"}, lambda x: set(x).issubset("abc"), marks=skipif_np1
),
pytest.param(
"T",
{"min_size": 1, "max_size": 2},
lambda x: 1 <= len(x) <= 2,
marks=skipif_np1,
),
("V", {"min_size": 1, "max_size": 2}, lambda x: 1 <= len(x.tobytes()) <= 2),
],
)
@given(data=st.data())
Expand Down

0 comments on commit 10ef133

Please sign in to comment.