Skip to content

Commit

Permalink
Merge pull request #286 from FAIRmat-NFDI/strict-string-check
Browse files Browse the repository at this point in the history
Modify byte decoding
  • Loading branch information
lukaspie authored Sep 13, 2024
2 parents b90d494 + c10e52a commit cf56332
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 3 deletions.
38 changes: 38 additions & 0 deletions dev_tools/tests/test_nxdl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,41 @@ def test_namefitting_precedence(better_fit, better_ref, worse_fit, worse_ref):
assert nexus.get_nx_namefit(better_fit, better_ref) > nexus.get_nx_namefit(
worse_fit, worse_ref
)


@pytest.mark.parametrize(
"string_obj, decode, expected",
[
# Test with lists of bytes and strings
([b"bytes", "string"], True, ["bytes", "string"]),
([b"bytes", "string"], False, [b"bytes", "string"]),
([b"bytes", b"more_bytes", "string"], True, ["bytes", "more_bytes", "string"]),
(
[b"bytes", b"more_bytes", "string"],
False,
[b"bytes", b"more_bytes", "string"],
),
([b"fixed", b"length", b"strings"], True, ["fixed", "length", "strings"]),
([b"fixed", b"length", b"strings"], False, [b"fixed", b"length", b"strings"]),
# Test with nested lists
([[b"nested1"], [b"nested2"]], True, [["nested1"], ["nested2"]]),
([[b"nested1"], [b"nested2"]], False, [[b"nested1"], [b"nested2"]]),
# Test with bytes
(b"single", True, "single"),
(b"single", False, b"single"),
# Test with str
("single", True, "single"),
("single", False, "single"),
# Test with int
(123, True, 123),
(123, False, 123),
],
)
def test_decode_or_not(string_obj, decode, expected):
# Handle normal cases
result = nexus.decode_or_not(elem=string_obj, decode=decode)
if isinstance(expected, list):
assert isinstance(result, list), f"Expected list, but got {type(result)}"
# Handle all other cases
else:
assert result == expected, f"Failed for {string_obj} with decode={decode}"
36 changes: 33 additions & 3 deletions dev_tools/utils/nxdl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,40 @@
from lxml.etree import ParseError as xmlER


def decode_or_not(elem):
"""Decodes a byte array to string if necessary"""
def decode_or_not(elem, encoding: str = "utf-8", decode: bool = True):
"""
Decodes a byte array to a string if necessary. All other types are returned untouched.
If `decode` is False, the initial value is returned without decoding, including for byte arrays.
Args:
elem: Any Python object that may need decoding.
encoding: The encoding scheme to use. Default is "utf-8".
decode: A boolean flag indicating whether to perform decoding.
Returns:
A decoded string (in case of a byte string) or the initial value.
If `decode` is False, always returns the initial value.
Raises:
ValueError: If a byte string cannot be decoded using the provided encoding.
"""
if not decode:
return elem

# Handle lists of bytes or strings
elif isinstance(elem, list):
if not elem:
return elem # Return an empty list unchanged

decoded_list = [decode_or_not(x, encoding, decode) for x in elem]
return decoded_list

if isinstance(elem, bytes):
elem = elem.decode("UTF-8")
try:
return elem.decode(encoding)
except UnicodeDecodeError as e:
raise ValueError(f"Error decoding bytes: {e}")

return elem


Expand Down

0 comments on commit cf56332

Please sign in to comment.