Skip to content

Commit

Permalink
Merge pull request #401 from radical-cybertools/feature/custom_serial…
Browse files Browse the repository at this point in the history
…ization

implement custom (de)serialization for json and msgpack
  • Loading branch information
andre-merzky authored May 8, 2024
2 parents 676792f + 38b82cd commit 1bec269
Show file tree
Hide file tree
Showing 13 changed files with 378 additions and 101 deletions.
5 changes: 4 additions & 1 deletion src/radical/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,14 @@
from .profile import PROF_KEY_MAX

from .json_io import read_json, read_json_str, write_json
from .json_io import parse_json, parse_json_str
from .json_io import parse_json, parse_json_str, dumps_json
from .which import which
from .tracer import trace, untrace
from .get_version import get_version

from .serialize import to_json, from_json, to_msgpack, from_msgpack
from .serialize import register_serializable


# import various utility methods
from .ids import *
Expand Down
15 changes: 11 additions & 4 deletions src/radical/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,22 +176,23 @@ class Config(TypedDict):

# --------------------------------------------------------------------------
#
def __init__(self, module=None, category=None, name=None, cfg=None,
from_dict=None, path=None, expand=True, env=None,
_internal=False):
def __init__(self, from_dict=None,
module=None, category=None, name=None,
cfg=None, path=None, expand=True,
env=None, _internal=False):
"""
Load a config (json) file from the module's config tree, and overload
any user specific config settings if found.
Parameters
----------
from_dict: alias for cfg, to satisfy base class constructor
module: used to determine the module's config file location
- default: `radical.utils`
category: name of config to be loaded from module's config path
name: specify a specific configuration to be used
path: path to app config json to be used for initialization
cfg: application config dict to be used for initialization
from_dict: alias for cfg, to satisfy base class constructor
expand: enable / disable environment var expansion
- default: True
env: environment dictionary to be used for expansion
Expand All @@ -215,6 +216,12 @@ def __init__(self, module=None, category=None, name=None, cfg=None,
configuration hierarchy.
"""

# if the `from_dict` is given but is a string, we interpret it as
# `module` parameter.
if from_dict and isinstance(from_dict, str):
module = from_dict
from_dict = None

if from_dict:
# if we could only overload constructors by signature... :-/
# As it is, we have to emulate that...
Expand Down
30 changes: 18 additions & 12 deletions src/radical/utils/json_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@


import re
import json

from .misc import as_string, ru_open
from .serialize import to_json, from_json
from .misc import as_string, ru_open


# ------------------------------------------------------------------------------
Expand Down Expand Up @@ -61,11 +61,22 @@ def write_json(data, fname):
fname = tmp

with ru_open(fname, 'w') as f:
json.dump(data, f, sort_keys=True, indent=4, ensure_ascii=False)
f.write(to_json(data))
f.write('\n')
f.flush()


# ------------------------------------------------------------------------------
#
def dumps_json(data):
'''
thin wrapper around python's json write, for consistency of interface
'''

return to_json(data)


# ------------------------------------------------------------------------------
#
def parse_json(json_str, filter_comments=True):
Expand All @@ -77,16 +88,11 @@ def parse_json(json_str, filter_comments=True):
are stripped from json before parsing
'''

if not filter_comments:
return json.loads(json_str)

else:
content = ''
for line in json_str.split('\n'):
content += re.sub(r'^\s*#.*$', '', line)
content += '\n'
if filter_comments:
json_str = '\n'.join([re.sub(r'^\s*#.*$', '', line)
for line in json_str.split('\n')])

return json.loads(content)
return from_json(json_str)


# ------------------------------------------------------------------------------
Expand Down
173 changes: 173 additions & 0 deletions src/radical/utils/serialize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@

import json
import msgpack

from .typeddict import as_dict, TypedDict

# ------------------------------------------------------------------------------
#
class _CType:

def __init__(self, ctype, encode, decode):

self.ctype : type = ctype
self.encode: callable = encode
self.decode: callable = decode


_ctypes = dict()


# ------------------------------------------------------------------------------
#
def register_serializable(cls, encode=None, decode=None):
'''
register a class for json and msgpack serialization / deserialization.
Args:
cls (type): class type to register
encode (callable): converts class instance into encodable data structure
decode (callable): recreates the class instance from that data structure
'''

if encode is None: encode = cls
if decode is None: decode = cls

_ctypes[cls.__name__] = _CType(cls, encode, decode)

register_serializable(TypedDict)


# ------------------------------------------------------------------------------
#
def _prep_typed_dict(d):
return as_dict(d, _annotate=True)


# ------------------------------------------------------------------------------
#
class _json_encoder(json.JSONEncoder):
'''
internal methods to encode registered classes to json
'''

def encode(self, o, *args, **kw):
tmp = as_dict(o, _annotate=True)
return super().encode(tmp, *args, **kw)

def default(self, o):
# print('encode: %s' % o)
for cname,methods in _ctypes.items():
if isinstance(o, methods.ctype):
return {'_type': cname,
'as_str': methods.encode(o)}
return super().default(o)


# ------------------------------------------------------------------------------
#
def _json_decoder(obj):
'''
internal methods to decode registered classes from json
'''
# print('decode: %s' % obj)
for cname, methods in _ctypes.items():
# print('check %s' % cname)
if '_type' in obj and obj['_type'] == cname:
del obj['_type']
# print('found %s' % cname)
if 'as_str' in obj:
return methods.decode(obj['as_str'])
return methods.decode(obj)
return obj


# ------------------------------------------------------------------------------
#
def _msgpack_encoder(obj):
'''
internal methods to encode registered classes to msgpack
'''
for cname,methods in _ctypes.items():
if isinstance(obj, methods.ctype):
return {'__%s__' % cname: True, 'as_str': methods.encode(obj)}
return obj


# ------------------------------------------------------------------------------
#
def _msgpack_decoder(obj):
'''
internal methods to decode registered classes from msgpack
'''
for cname,methods in _ctypes.items():
if '__%s__' % cname in obj:
return methods.decode(obj['as_str'])
return obj


# ------------------------------------------------------------------------------
#
def to_json(data):
'''
convert data to json, using registered classes for serialization
Args:
data (object): data to be serialized
Returns:
str: json serialized data
'''
return json.dumps(data, sort_keys=True, indent=4, ensure_ascii=False,
cls=_json_encoder)


# ------------------------------------------------------------------------------
#
def from_json(data):
'''
convert json data to python data structures, using registered classes for
deserialization
Args:
data (str): json data to be deserialized
Returns:
object: deserialized data
'''
return json.loads(data, object_hook=_json_decoder)


# ------------------------------------------------------------------------------
#
def to_msgpack(data):
'''
convert data to msgpack, using registered classes for serialization
Args:
data (object): data to be serialized
Returns:
bytes: msgpack serialized data
'''
return msgpack.packb(data, default=_msgpack_encoder, use_bin_type=True)


# ------------------------------------------------------------------------------
#
def from_msgpack(data):
'''
convert msgpack data to python data structures, using registered classes for
deserialization
Args:
data (bytes): msgpack data to be deserialized
Returns:
object: deserialized data
'''
return msgpack.unpackb(data, object_hook=_msgpack_decoder, raw=False)


# ------------------------------------------------------------------------------

47 changes: 30 additions & 17 deletions src/radical/utils/typeddict.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import copy
import sys

from .misc import as_list, as_tuple, is_string
from .misc import as_list, as_tuple, is_string


# ------------------------------------------------------------------------------
Expand Down Expand Up @@ -98,7 +98,16 @@ def __new__(mcs, name, bases, namespace):
elif k not in namespace:
namespace[k] = v

return super().__new__(mcs, name, bases, namespace)
_new_cls = super().__new__(mcs, name, bases, namespace)

if _new_cls.__base__ is not dict:

# register sub-classes
from .serialize import register_serializable
register_serializable(_new_cls)

return _new_cls



# ------------------------------------------------------------------------------
Expand Down Expand Up @@ -138,6 +147,10 @@ def __init__(self, from_dict=None, **kwargs):
`kwargs`).
'''

from .serialize import register_serializable

register_serializable(self.__class__)

self.update(copy.deepcopy(self._defaults))
self.update(from_dict)

Expand Down Expand Up @@ -288,15 +301,15 @@ def __getattr__(self, k):

def __setattr__(self, k, v):

# if k.startswith('_'):
# return object.__setattr__(self, k, v)
if k.startswith('__'):
return object.__setattr__(self, k, v)

self._data[k] = self._verify_setter(k, v)

def __delattr__(self, k):

# if k.startswith('_'):
# return object.__delattr__(self, k)
if k.startswith('__'):
return object.__delattr__(self, k)

del self._data[k]

Expand All @@ -312,8 +325,8 @@ def __repr__(self):

# --------------------------------------------------------------------------
#
def as_dict(self):
return as_dict(self._data)
def as_dict(self, _annotate=False):
return as_dict(self._data, _annotate)


# --------------------------------------------------------------------------
Expand Down Expand Up @@ -483,21 +496,21 @@ def _query(self, key, default=None, last_key=True):

# ------------------------------------------------------------------------------
#
def _as_dict_value(v):
return v.as_dict() if isinstance(v, TypedDict) else as_dict(v)


def as_dict(src):
def as_dict(src, _annotate=False):
'''
Iterate given object, apply `as_dict()` to all typed
values, and return the result (effectively a shallow copy).
'''
if isinstance(src, dict):
tgt = {k: _as_dict_value(v) for k, v in src.items()}
if isinstance(src, TypedDict):
tgt = {k: as_dict(v, _annotate) for k, v in src.items()}
if _annotate:
tgt['_type'] = type(src).__name__
elif isinstance(src, dict):
tgt = {k: as_dict(v, _annotate) for k, v in src.items()}
elif isinstance(src, list):
tgt = [_as_dict_value(x) for x in src]
tgt = [as_dict(x, _annotate) for x in src]
elif isinstance(src, tuple):
tgt = tuple([_as_dict_value(x) for x in src])
tgt = tuple([as_dict(x, _annotate) for x in src])
else:
tgt = src
return tgt
Expand Down
Loading

0 comments on commit 1bec269

Please sign in to comment.