Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Added support for tuples, namedtuples (both from collections and typing), sets, frozensets and OrderedDict's in MSONable, MontyEncoder and MontyDecoder #100

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
0755e76
Added is_namedtuple method to test whether an object is a namedtuple …
davidwaroquiers Jan 14, 2020
f259b94
Merge branch 'master' of https://github.com/materialsvirtuallab/monty
davidwaroquiers Jan 14, 2020
92246a7
Added (partial) support for tuple and namedtuples in json.py.
davidwaroquiers Jan 14, 2020
680461d
Added support for tuple, namedtuples and OrderedDict in json.py.
davidwaroquiers Jan 14, 2020
9a57714
Fixed namedtuple support for python versions < 3.7, for which default
davidwaroquiers Jan 14, 2020
41f4463
Disable pylint's check on unexpected keyword argument 'defaults'.
davidwaroquiers Jan 14, 2020
d5527b9
Disable pylint's check on unexpected keyword argument 'defaults'.
davidwaroquiers Jan 14, 2020
3f3c029
Added is_NamedTuple to check whether object is a class generated from…
davidwaroquiers Jan 15, 2020
6866658
Added validate_NamedTuple to check whether the items in a NamedTuple …
davidwaroquiers Jan 16, 2020
b67e6e6
Changed validate_NamedTuple to accept subclass of the types as valid …
davidwaroquiers Jan 16, 2020
e233d4c
Changed validate_NamedTuple to accept subclass of the types as valid …
davidwaroquiers Jan 16, 2020
465467d
Ignoring pytest import for mypy.
davidwaroquiers Jan 16, 2020
3ed73cc
Changed "builtins" to "@builtins" in the serialization of tuples, nam…
davidwaroquiers Jan 16, 2020
7e823a9
Added serialization of typing.NamedTuple's.
davidwaroquiers Jan 17, 2020
d982c3b
Added serialization of sets.
davidwaroquiers Jan 17, 2020
04e0513
Fixed pycodestyle, pylint, mypy, ...
davidwaroquiers Jan 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions monty/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,27 @@ def dict2namedtuple(*args, **kwargs):
d.update(**kwargs)
return collections.namedtuple(typename="dict2namedtuple",
field_names=list(d.keys()))(**d)


def is_namedtuple(obj):
"""Test if an object is a class generated from collections.namedtuple."""
return (isinstance(obj, tuple) and hasattr(obj, "_fields") and
hasattr(obj, "_asdict") and (not hasattr(obj, '_field_types')))


def is_NamedTuple(obj):
"""Test if an object is a class generated from typing.NamedTuple."""
return (isinstance(obj, tuple) and hasattr(obj, "_fields") and
hasattr(obj, "_asdict") and hasattr(obj, '_field_types') and
hasattr(obj, '__annotations__'))


def validate_NamedTuple(obj):
"""Validates whether the items in the NamedTuple have the correct type."""
if not is_NamedTuple(obj):
raise ValueError('Cannot validate object of type "{}".'.format(obj.__class__.__name__))
for field, field_type in obj._field_types.items():
value = getattr(obj, field)
if not isinstance(value, field_type):
return False
return True
161 changes: 151 additions & 10 deletions monty/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
import os
import json
import datetime
import sys

from hashlib import sha1
from collections import OrderedDict, defaultdict
from collections import namedtuple
from enum import Enum
from typing import NamedTuple

from importlib import import_module

Expand All @@ -33,6 +36,10 @@
except ImportError:
yaml = None # type: ignore

from monty.collections import is_namedtuple
from monty.collections import is_NamedTuple


__version__ = "3.0.0"


Expand Down Expand Up @@ -62,6 +69,108 @@ def _load_redirect(redirect_file):
return dict(redirect_dict)


# (Private) helper methods and variables for the serialization of
# types for typing.NamedTuple's.
_typ2name = {typ: typ.__name__ for typ in (bool, int, float, complex,
list, tuple, range,
str,
bytes, bytearray, memoryview,
set, frozenset,
dict)}
_name2typ = {val: key for key, val in _typ2name.items()}
_name2typ['NoneType'] = type(None)


def _serialize_type(typ):
"""Serialization of types."""
# Builtin standard types
if typ in _typ2name:
return{"@module": "@builtins",
"@class": "@types",
"type": _typ2name[typ]}
# None/NoneType is a special case
if typ is type(None) or typ is None: # noqa - disable pycodestyle check here
return {"@module": "@builtins",
"@class": "@types",
"type": "NoneType"}
# Other types ("normal" classes)
return {"@module": "@builtins",
"@class": "@types",
"type": "@class",
"type_module": typ.__module__,
"type_class": typ.__name__}


def _deserialize_type(d):
"""Deserialization of types."""
if d["type"] in _name2typ:
return _name2typ[d["type"]]
if d["type"] == "@class":
modname = d["type_module"]
classname = d["type_class"]
if classname in MSONable.REDIRECT.get(modname, {}):
modname = MSONable.REDIRECT[modname][classname]["@module"]
classname = MSONable.REDIRECT[modname][classname]["@class"]
mod = __import__(modname, globals(), locals(), [classname], 0)
try:
return getattr(mod, classname)
except AttributeError:
raise ValueError('Could not deserialize type.')
raise ValueError('Could not deserialize type.')


def _recursive_as_dict(obj):
"""Recursive function to prepare serialization of objects.

Takes care of tuples, namedtuples, OrderedDict, objects with an as_dict method.
"""
if is_namedtuple(obj):
d = {"namedtuple_as_list": [_recursive_as_dict(it) for it in obj],
"fields": obj._fields,
"typename": obj.__class__.__name__,
"@module": "@builtins",
"@class": "collections.namedtuple"}
if sys.version_info >= (3, 7): # default values for collections.namedtuple's were introduced in python 3.7.
d["fields_defaults"] = obj._fields_defaults
return d
if is_NamedTuple(obj):
d = {"NamedTuple_as_list": [_recursive_as_dict(it) for it in obj],
"fields": obj._fields,
"fields_types": [_serialize_type(obj._field_types[field]) for field in obj._fields],
"typename": obj.__class__.__name__,
"doc": obj.__doc__,
"@module": "@builtins",
"@class": "typing.NamedTuple"}
if sys.version_info >= (3, 6): # default values for typing.NamedTuple's were introduced in python 3.6.
try:
d["fields_defaults"] = [(field, _recursive_as_dict(field_default))
for field, field_default in obj._field_defaults.items()]
except AttributeError:
d["fields_defaults"] = []
return d
# The order of the ifs matter here as namedtuples and NamedTuples are instances (subclasses) of tuples,
# same for OrderedDict which is an instance (subclass) of dict.
if isinstance(obj, set):
return {"set_as_list": [_recursive_as_dict(it) for it in obj],
"@module": "@builtins",
"@class": "set"}
if isinstance(obj, tuple):
return {"tuple_as_list": [_recursive_as_dict(it) for it in obj],
"@module": "@builtins",
"@class": "tuple"}
if isinstance(obj, OrderedDict):
return {"ordereddict_as_list": [[key, _recursive_as_dict(val)] for key, val in obj.items()],
"@module": "@builtins",
"@class": "OrderedDict"}
if isinstance(obj, list):
return [_recursive_as_dict(it) for it in obj]
if isinstance(obj, dict):
return {kk: _recursive_as_dict(vv) for kk, vv in obj.items()}
if hasattr(obj, "as_dict"):
return obj.as_dict()
return obj


class MSONable:
"""
This is a mix-in base class specifying an API for msonable objects. MSON
Expand Down Expand Up @@ -122,15 +231,6 @@ def as_dict(self) -> dict:

args = getfullargspec(self.__class__.__init__).args

def recursive_as_dict(obj):
if isinstance(obj, (list, tuple)):
return [recursive_as_dict(it) for it in obj]
if isinstance(obj, dict):
return {kk: recursive_as_dict(vv) for kk, vv in obj.items()}
if hasattr(obj, "as_dict"):
return obj.as_dict()
return obj

for c in args:
if c != "self":
try:
Expand All @@ -147,7 +247,7 @@ def recursive_as_dict(obj):
"a self.kwargs variable to automatically "
"determine the dict format. Alternatively, "
"you can implement both as_dict and from_dict.")
d[c] = recursive_as_dict(a)
d[c] = _recursive_as_dict(a)
if hasattr(self, "kwargs"):
# type: ignore
d.update(**getattr(self, "kwargs")) # pylint: disable=E1101
Expand Down Expand Up @@ -251,6 +351,8 @@ def default(self, o) -> dict: # pylint: disable=E0202
"@class": "ObjectId",
"oid": str(o)}

# Is this still useful as we are now calling the _recursive_as_dict
# method (which takes care of as_dict's) before the encoding ?
try:
d = o.as_dict()
if "@module" not in d:
Expand All @@ -268,6 +370,17 @@ def default(self, o) -> dict: # pylint: disable=E0202
except AttributeError:
return json.JSONEncoder.default(self, o)

def encode(self, o):
"""MontyEncoder's encode method.

First, prepares the object by recursively transforming tuples, namedtuples,
object having an as_dict method and others to encodable python objects.
"""
# This cannot go in the default method because default is called as a last resort,
# such that tuples and namedtuples have already been transformed to lists by json's encode method.
o = _recursive_as_dict(o)
return super().encode(o)


class MontyDecoder(json.JSONDecoder):
"""
Expand Down Expand Up @@ -308,6 +421,34 @@ def process_decoded(self, d):
dt = datetime.datetime.strptime(d["string"],
"%Y-%m-%d %H:%M:%S")
return dt
if modname == "@builtins":
if classname == "tuple":
return tuple([self.process_decoded(item) for item in d['tuple_as_list']])
if classname == "set":
return {self.process_decoded(item) for item in d['set_as_list']}
if classname == "collections.namedtuple":
# default values for collections.namedtuple have been introduced in python 3.7
# it is probably not essential to deserialize the defaults if the object was serialized with
# python >= 3.7 and deserialized with python < 3.7.
if sys.version_info < (3, 7):
nt = namedtuple(d['typename'], d['fields'])
else:
nt = namedtuple(d['typename'], d['fields'], # pylint: disable=E1123
defaults=d['fields_defaults']) # pylint: disable=E1123
return nt(*[self.process_decoded(item) for item in d['namedtuple_as_list']])
if classname == "typing.NamedTuple":
NT = NamedTuple(d['typename'], [(field, _deserialize_type(field_type))
for field, field_type in zip(d['fields'], d['fields_types'])])
NT.__doc__ = d['doc']
# default values for typing.NamedTuple have been introduced in python 3.6
if sys.version_info >= (3, 6):
NT._field_defaults = OrderedDict([(field, self.process_decoded(default))
for field, default in d['fields_defaults']])
return NT(*[self.process_decoded(item) # pylint: disable=E1102
for item in d['NamedTuple_as_list']]) # pylint: disable=E1102
if classname == "OrderedDict":
return OrderedDict([(key, self.process_decoded(val))
for key, val in d['ordereddict_as_list']])

mod = __import__(modname, globals(), locals(), [classname], 0)
if hasattr(mod, classname):
Expand Down
71 changes: 71 additions & 0 deletions tests/test_collections.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
import unittest
import os
import sys

from collections import namedtuple
from typing import NamedTuple
import pytest # type: ignore # Ignore pytest import for mypy

from monty.collections import frozendict, Namespace, AttrDict, \
FrozenAttrDict, tree
from monty.collections import is_namedtuple
from monty.collections import is_NamedTuple
from monty.collections import validate_NamedTuple

test_dir = os.path.join(os.path.dirname(__file__), 'test_files')

Expand Down Expand Up @@ -49,5 +57,68 @@ def test_tree(self):
self.assertEqual(x['a']['b']['c']['d'], 1)


def test_is_namedtuple():
# Testing collections.namedtuple
a_nt = namedtuple('a', ['x', 'y', 'z'])
a1 = a_nt(1, 2, 3)
assert a1 == (1, 2, 3)
assert is_namedtuple(a1) is True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need "is True". The assert tests whether it is True.
Similarly, 3 lines later you just need "assert not is_named_tuple..."

Copy link
Contributor Author

@davidwaroquiers davidwaroquiers Jan 16, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is actually not exactly equivalent. The "is True" also checks that the returned value is a bool and not for example an empty list vs a non empty list. If is_namedtuple(a1) were to return, e.g. ["hello"] (or to return [] in the "False" case), "assert is_namedtuple(a1)" would also pass while the API would have changed. I would keep the "is True" if that's ok for you. Same for 3 lines after (and also in quite a lot of other places).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I know it is not exactly equivalent, but we need to think of typical use cases. When people use that method, they are not going to check that it is a boolean type. They will merely check that it is a "True-like" value. E.g., if is_namedtuple(a1). That's the Pythonic way of doing this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course, users will use it this way. My point is that this method is supposed (from its docstring/name) to return a bool indicating whether it is a namedtuple or not. Why should we allow any new development/change to return something else (e.g. the string "Yes it is a namedtuple" for the True case and "" for the False case) ? This unit test is just preventing that. Anyway if you prefer to remove the "is True", I can do it. For me the pythonic way applies for the code itself, but the test code can deviate from that for the reason above.

assert is_NamedTuple(a1) is False
with pytest.raises(ValueError, match=r'Cannot validate object of type "a"\.'):
validate_NamedTuple(a1)
a_t = tuple([1, 2])
assert a_t == (1, 2)
assert is_namedtuple(a_t) is False
assert is_NamedTuple(a_t) is False
with pytest.raises(ValueError, match=r'Cannot validate object of type "tuple"\.'):
validate_NamedTuple(a_t)

class SubList(list):
def _fields(self):
return []
def _fields_defaults(self):
return []
def _asdict(self):
return {}

sublist = SubList([3, 2, 1])
assert sublist == [3, 2, 1]
assert is_namedtuple(sublist) is False
assert is_NamedTuple(sublist) is False
with pytest.raises(ValueError, match=r'Cannot validate object of type "SubList"\.'):
validate_NamedTuple(sublist)

# Testing typing.NamedTuple
A = NamedTuple('A', [('int1', int), ('str1', str)])
nt = A(3, 'b')
assert is_NamedTuple(nt) is True
assert is_namedtuple(nt) is False
assert validate_NamedTuple(nt) is True
nt = A(3, 2)
assert validate_NamedTuple(nt) is False
nt = A('a', 'b')
assert validate_NamedTuple(nt) is False

# Testing typing.NamedTuple with type annotations (for python >= 3.6)
# This will not work for python < 3.6, leading to a SyntaxError hence the
# exec here.
try:
exec('class B(NamedTuple):\n\
int1: int = 1\n\
str1: str = \'a\'\n\
global B') # Make the B class available globally
nt = B(2, 'hello')
assert is_NamedTuple(nt) is True
assert is_namedtuple(nt) is False
assert validate_NamedTuple(nt) is True
nt = B('a', 'b')
assert validate_NamedTuple(nt) is False
nt = B(3, 4)
assert validate_NamedTuple(nt) is False
except SyntaxError:
# Make sure we get this SyntaxError only in the case of python < 3.6.
assert sys.version_info < (3, 6)


if __name__ == "__main__":
unittest.main()
Loading