From 95d182827b7e704a56dde6425e0144ee54b50497 Mon Sep 17 00:00:00 2001 From: Suren Khorenyan Date: Thu, 23 Apr 2020 11:54:17 +0300 Subject: [PATCH 1/6] Propagate unknown parameter to nested fields --- src/marshmallow/fields.py | 10 +++--- src/marshmallow/schema.py | 8 +++++ tests/test_fields.py | 64 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 4 deletions(-) diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index a725c1327..eeef31165 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -571,16 +571,18 @@ def _test_collection(self, value): if many and not utils.is_collection(value): raise self.make_error("type", input=value, type=value.__class__.__name__) - def _load(self, value, data, partial=None): + def _load(self, value, data, partial=None, unknown=None): try: - valid_data = self.schema.load(value, unknown=self.unknown, partial=partial) + valid_data = self.schema.load( + value, unknown=unknown or self.unknown, partial=partial, + ) except ValidationError as error: raise ValidationError( error.messages, valid_data=error.valid_data ) from error return valid_data - def _deserialize(self, value, attr, data, partial=None, **kwargs): + def _deserialize(self, value, attr, data, partial=None, unknown=None, **kwargs): """Same as :meth:`Field._deserialize` with additional ``partial`` argument. :param bool|tuple partial: For nested schemas, the ``partial`` @@ -590,7 +592,7 @@ def _deserialize(self, value, attr, data, partial=None, **kwargs): Add ``partial`` parameter. """ self._test_collection(value) - return self._load(value, data, partial=partial) + return self._load(value, data, partial=partial, unknown=unknown) class Pluck(Nested): diff --git a/src/marshmallow/schema.py b/src/marshmallow/schema.py index 78b386717..cba08da9c 100644 --- a/src/marshmallow/schema.py +++ b/src/marshmallow/schema.py @@ -659,6 +659,13 @@ def _deserialize( d_kwargs["partial"] = sub_partial else: d_kwargs["partial"] = partial + + try: + if self.context["propagate_unknown_to_nested"]: + d_kwargs["unknown"] = unknown + except KeyError: + pass + getter = lambda val: field_obj.deserialize( val, field_name, data, **d_kwargs ) @@ -836,6 +843,7 @@ def _do_load( error_store = ErrorStore() errors = {} # type: typing.Dict[str, typing.List[str]] many = self.many if many is None else bool(many) + self.context["propagate_unknown_to_nested"] = unknown is not None unknown = unknown or self.unknown if partial is None: partial = self.partial diff --git a/tests/test_fields.py b/tests/test_fields.py index 19abeb6b6..8d6d38493 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -296,6 +296,70 @@ class MySchema(Schema): } +class TestNestedFieldPropagatesUnknown: + class SpamSchema(Schema): + meat = fields.String() + + class CanSchema(Schema): + spam = fields.Nested("SpamSchema") + + class ShelfSchema(Schema): + can = fields.Nested("CanSchema") + + @pytest.fixture + def data_nested_unknown(self): + return { + "spam": {"meat": "pork", "add-on": "eggs"}, + } + + @pytest.fixture + def multi_nested_data_with_unknown(self, data_nested_unknown): + return { + "can": data_nested_unknown, + "box": {"foo": "bar"}, + } + + @pytest.mark.parametrize("schema_kw", ({}, {"unknown": INCLUDE})) + def test_raises_when_unknown_passed_to_first_level_nested( + self, schema_kw, data_nested_unknown, + ): + with pytest.raises(ValidationError) as exc_info: + self.CanSchema(**schema_kw).load(data_nested_unknown) + assert exc_info.value.messages == {"spam": {"add-on": ["Unknown field."]}} + + @pytest.mark.parametrize( + "load_kw,expected_data", + ( + ({"unknown": INCLUDE}, {"spam": {"meat": "pork", "add-on": "eggs"}}), + ({"unknown": EXCLUDE}, {"spam": {"meat": "pork"}}), + ), + ) + def test_processes_when_unknown_stated_directly( + self, load_kw, data_nested_unknown, expected_data, + ): + data = self.CanSchema().load(data_nested_unknown, **load_kw) + assert data == expected_data + + @pytest.mark.parametrize( + "load_kw,expected_data", + ( + ( + {"unknown": INCLUDE}, + { + "can": {"spam": {"meat": "pork", "add-on": "eggs"}}, + "box": {"foo": "bar"}, + }, + ), + ({"unknown": EXCLUDE}, {"can": {"spam": {"meat": "pork"}}}), + ), + ) + def test_propagates_unknown_to_multi_nested_fields( + self, load_kw, expected_data, multi_nested_data_with_unknown, + ): + data = self.ShelfSchema().load(multi_nested_data_with_unknown, **load_kw) + assert data == expected_data + + class TestListNested: @pytest.mark.parametrize("param", ("only", "exclude", "dump_only", "load_only")) def test_list_nested_only_exclude_dump_only_load_only_propagated_to_nested( From 9b9723559c5f1ad054f207c3259ed3b40c0ec2e0 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Thu, 16 Jul 2020 23:08:54 +0000 Subject: [PATCH 2/6] Make "propagate_unknown" behavior explicit This adds a new option, `propagate_unknown` which defaults to False. It controls the behavior of `unknown` with respect to nested structures. Anywhere that `unknown` can be set, `propagate_unknown` can be set. That means it can be applied to a schema instance, a load call, schema.Meta, or to fields.Nested . When set, nested deserialize calls will get the same value for `unknown` which their parent call got and they will receive `propagate_unknown=True`. The new flag is completely opt-in and therefore backwards compatible with any current usages of marshmallow. Once you opt in to this behavior on a schema, you don't need to worry about making sure it's set by nested schemas that you use. In the name of simplicity, this sacrifices a bit of flexibility. A schema with `propagate_unknown=True, unknown=...` will override the `unknown` settings on any of its child schemas. Tests cover usage as a schema instantiation arg and as a load arg for some simple data structures. --- src/marshmallow/base.py | 19 ++++++++- src/marshmallow/fields.py | 30 +++++++++++-- src/marshmallow/schema.py | 40 ++++++++++++----- tests/test_fields.py | 90 ++++++++++++++++++++++----------------- 4 files changed, 124 insertions(+), 55 deletions(-) diff --git a/src/marshmallow/base.py b/src/marshmallow/base.py index 6e739b350..05db11144 100644 --- a/src/marshmallow/base.py +++ b/src/marshmallow/base.py @@ -38,10 +38,25 @@ def dump(self, obj, *, many: bool = None): def dumps(self, obj, *, many: bool = None): raise NotImplementedError - def load(self, data, *, many: bool = None, partial=None, unknown=None): + def load( + self, + data, + *, + many: bool = None, + partial=None, + unknown=None, + propagate_unknown=None + ): raise NotImplementedError def loads( - self, json_data, *, many: bool = None, partial=None, unknown=None, **kwargs + self, + json_data, + *, + many: bool = None, + partial=None, + unknown=None, + propagate_unknown=None, + **kwargs ): raise NotImplementedError diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index eeef31165..ad292143e 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -474,6 +474,7 @@ def __init__( exclude: types.StrSequenceOrSet = (), many: bool = False, unknown: str = None, + propagate_unknown: bool = None, **kwargs ): # Raise error if only or exclude is passed as string, not list of strings @@ -494,6 +495,7 @@ def __init__( self.exclude = exclude self.many = many self.unknown = unknown + self.propagate_unknown = propagate_unknown self._schema = None # Cached Schema instance super().__init__(default=default, **kwargs) @@ -571,10 +573,15 @@ def _test_collection(self, value): if many and not utils.is_collection(value): raise self.make_error("type", input=value, type=value.__class__.__name__) - def _load(self, value, data, partial=None, unknown=None): + def _load(self, value, data, partial=None, unknown=None, propagate_unknown=None): try: valid_data = self.schema.load( - value, unknown=unknown or self.unknown, partial=partial, + value, + unknown=unknown or self.unknown, + propagate_unknown=propagate_unknown + if propagate_unknown is not None + else self.propagate_unknown, + partial=partial, ) except ValidationError as error: raise ValidationError( @@ -582,7 +589,16 @@ def _load(self, value, data, partial=None, unknown=None): ) from error return valid_data - def _deserialize(self, value, attr, data, partial=None, unknown=None, **kwargs): + def _deserialize( + self, + value, + attr, + data, + partial=None, + unknown=None, + propagate_unknown=None, + **kwargs + ): """Same as :meth:`Field._deserialize` with additional ``partial`` argument. :param bool|tuple partial: For nested schemas, the ``partial`` @@ -592,7 +608,13 @@ def _deserialize(self, value, attr, data, partial=None, unknown=None, **kwargs): Add ``partial`` parameter. """ self._test_collection(value) - return self._load(value, data, partial=partial, unknown=unknown) + return self._load( + value, + data, + partial=partial, + unknown=unknown, + propagate_unknown=propagate_unknown, + ) class Pluck(Nested): diff --git a/src/marshmallow/schema.py b/src/marshmallow/schema.py index cba08da9c..c52aadc9a 100644 --- a/src/marshmallow/schema.py +++ b/src/marshmallow/schema.py @@ -228,6 +228,7 @@ def __init__(self, meta, ordered: bool = False): self.load_only = getattr(meta, "load_only", ()) self.dump_only = getattr(meta, "dump_only", ()) self.unknown = getattr(meta, "unknown", RAISE) + self.propagate_unknown = getattr(meta, "propagate_unknown", False) self.register = getattr(meta, "register", True) @@ -372,7 +373,8 @@ def __init__( load_only: types.StrSequenceOrSet = (), dump_only: types.StrSequenceOrSet = (), partial: typing.Union[bool, types.StrSequenceOrSet] = False, - unknown: str = None + unknown: str = None, + propagate_unknown: bool = None ): # Raise error if only or exclude is passed as string, not list of strings if only is not None and not is_collection(only): @@ -389,6 +391,7 @@ def __init__( self.dump_only = set(dump_only) or set(self.opts.dump_only) self.partial = partial self.unknown = unknown or self.opts.unknown + self.propagate_unknown = propagate_unknown or self.opts.propagate_unknown self.context = context or {} self._normalize_nested_options() #: Dictionary mapping field_names -> :class:`Field` objects @@ -592,6 +595,7 @@ def _deserialize( many: bool = False, partial=False, unknown=RAISE, + propagate_unknown=False, index=None ) -> typing.Union[_T, typing.List[_T]]: """Deserialize ``data``. @@ -625,6 +629,7 @@ def _deserialize( many=False, partial=partial, unknown=unknown, + propagate_unknown=propagate_unknown, index=idx, ), ) @@ -648,7 +653,7 @@ def _deserialize( partial_is_collection and attr_name in partial ): continue - d_kwargs = {} + d_kwargs = {} # type: typing.Dict[str, typing.Any] # Allow partial loading of nested schemas. if partial_is_collection: prefix = field_name + "." @@ -660,11 +665,9 @@ def _deserialize( else: d_kwargs["partial"] = partial - try: - if self.context["propagate_unknown_to_nested"]: - d_kwargs["unknown"] = unknown - except KeyError: - pass + if propagate_unknown: + d_kwargs["unknown"] = unknown + d_kwargs["propagate_unknown"] = True getter = lambda val: field_obj.deserialize( val, field_name, data, **d_kwargs @@ -705,7 +708,8 @@ def load( *, many: bool = None, partial: typing.Union[bool, types.StrSequenceOrSet] = None, - unknown: str = None + unknown: str = None, + propagate_unknown: bool = None ): """Deserialize a data structure to an object defined by this Schema's fields. @@ -728,7 +732,12 @@ def load( if invalid data are passed. """ return self._do_load( - data, many=many, partial=partial, unknown=unknown, postprocess=True + data, + many=many, + partial=partial, + unknown=unknown, + propagate_unknown=propagate_unknown, + postprocess=True, ) def loads( @@ -738,6 +747,7 @@ def loads( many: bool = None, partial: typing.Union[bool, types.StrSequenceOrSet] = None, unknown: str = None, + propagate_unknown: bool = None, **kwargs ): """Same as :meth:`load`, except it takes a JSON string as input. @@ -761,7 +771,13 @@ def loads( if invalid data are passed. """ data = self.opts.render_module.loads(json_data, **kwargs) - return self.load(data, many=many, partial=partial, unknown=unknown) + return self.load( + data, + many=many, + partial=partial, + unknown=unknown, + propagate_unknown=propagate_unknown, + ) def _run_validator( self, @@ -822,6 +838,7 @@ def _do_load( many: bool = None, partial: typing.Union[bool, types.StrSequenceOrSet] = None, unknown: str = None, + propagate_unknown: bool = None, postprocess: bool = True ): """Deserialize `data`, returning the deserialized result. @@ -843,8 +860,8 @@ def _do_load( error_store = ErrorStore() errors = {} # type: typing.Dict[str, typing.List[str]] many = self.many if many is None else bool(many) - self.context["propagate_unknown_to_nested"] = unknown is not None unknown = unknown or self.unknown + propagate_unknown = propagate_unknown or self.propagate_unknown if partial is None: partial = self.partial # Run preprocessors @@ -868,6 +885,7 @@ def _do_load( many=many, partial=partial, unknown=unknown, + propagate_unknown=propagate_unknown, ) # Run field-level validation self._invoke_field_validators( diff --git a/tests/test_fields.py b/tests/test_fields.py index 8d6d38493..c155353fc 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -308,56 +308,70 @@ class ShelfSchema(Schema): @pytest.fixture def data_nested_unknown(self): - return { - "spam": {"meat": "pork", "add-on": "eggs"}, - } + return {"spam": {"meat": "pork", "add-on": "eggs"}} @pytest.fixture def multi_nested_data_with_unknown(self, data_nested_unknown): - return { - "can": data_nested_unknown, + return {"can": data_nested_unknown, "box": {"foo": "bar"}} + + @pytest.mark.parametrize( + "schema_kwargs,load_kwargs", + [ + ({}, {"propagate_unknown": True, "unknown": INCLUDE}), + ({"propagate_unknown": True}, {"unknown": INCLUDE}), + ({"propagate_unknown": True, "unknown": INCLUDE}, {}), + ({"unknown": INCLUDE}, {"propagate_unknown": True}), + ], + ) + def test_propagate_unknown_include( + self, + schema_kwargs, + load_kwargs, + data_nested_unknown, + multi_nested_data_with_unknown, + ): + data = self.ShelfSchema(**schema_kwargs).load( + multi_nested_data_with_unknown, **load_kwargs + ) + assert data == { + "can": {"spam": {"meat": "pork", "add-on": "eggs"}}, "box": {"foo": "bar"}, } - @pytest.mark.parametrize("schema_kw", ({}, {"unknown": INCLUDE})) - def test_raises_when_unknown_passed_to_first_level_nested( - self, schema_kw, data_nested_unknown, - ): - with pytest.raises(ValidationError) as exc_info: - self.CanSchema(**schema_kw).load(data_nested_unknown) - assert exc_info.value.messages == {"spam": {"add-on": ["Unknown field."]}} + data = self.CanSchema(**schema_kwargs).load(data_nested_unknown, **load_kwargs) + assert data == {"spam": {"meat": "pork", "add-on": "eggs"}} @pytest.mark.parametrize( - "load_kw,expected_data", - ( - ({"unknown": INCLUDE}, {"spam": {"meat": "pork", "add-on": "eggs"}}), - ({"unknown": EXCLUDE}, {"spam": {"meat": "pork"}}), - ), + "schema_kwargs,load_kwargs", + [ + ({}, {"propagate_unknown": True, "unknown": EXCLUDE}), + ({"propagate_unknown": True}, {"unknown": EXCLUDE}), + ({"propagate_unknown": True, "unknown": EXCLUDE}, {}), + ({"unknown": EXCLUDE}, {"propagate_unknown": True}), + ], ) - def test_processes_when_unknown_stated_directly( - self, load_kw, data_nested_unknown, expected_data, + def test_propagate_unknown_exclude( + self, + schema_kwargs, + load_kwargs, + data_nested_unknown, + multi_nested_data_with_unknown, ): - data = self.CanSchema().load(data_nested_unknown, **load_kw) - assert data == expected_data + data = self.ShelfSchema(**schema_kwargs).load( + multi_nested_data_with_unknown, **load_kwargs + ) + assert data == {"can": {"spam": {"meat": "pork"}}} - @pytest.mark.parametrize( - "load_kw,expected_data", - ( - ( - {"unknown": INCLUDE}, - { - "can": {"spam": {"meat": "pork", "add-on": "eggs"}}, - "box": {"foo": "bar"}, - }, - ), - ({"unknown": EXCLUDE}, {"can": {"spam": {"meat": "pork"}}}), - ), - ) - def test_propagates_unknown_to_multi_nested_fields( - self, load_kw, expected_data, multi_nested_data_with_unknown, + data = self.CanSchema(**schema_kwargs).load(data_nested_unknown, **load_kwargs) + assert data == {"spam": {"meat": "pork"}} + + @pytest.mark.parametrize("schema_kw", ({}, {"unknown": INCLUDE})) + def test_raises_when_unknown_passed_to_first_level_nested( + self, schema_kw, data_nested_unknown ): - data = self.ShelfSchema().load(multi_nested_data_with_unknown, **load_kw) - assert data == expected_data + with pytest.raises(ValidationError) as exc_info: + self.CanSchema(**schema_kw).load(data_nested_unknown) + assert exc_info.value.messages == {"spam": {"add-on": ["Unknown field."]}} class TestListNested: From 9e1f75f10ae53e0991f92b2343956f2550f6523d Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Fri, 17 Jul 2020 00:28:10 +0000 Subject: [PATCH 3/6] Make propagate_unknown respect any explicit value propagate_unknown will still traverse any series of nested documents, meaning that once you set propagate_unknown=True, it is true for the whole schema structure. However, this introduces tracking for whether or not `unknown` was set explicitly. If `unknown=RAISE` is set because no value was specified, we will set a new flag on the schema, `auto_unknown=True`. propagate_unknown now has the following behavior: - if the nested schema has auto_unknown=False, use the current value for `unknown` in the nested `load` call - if a nested field has its `unknown` attribute set, use that in place of any value sent via `propagate_unknown` Effectively, this means that if you set `unknown` explicitly anywhere in a nested schema structure, it will propagate downwards from that point. Combined with the fact that propagate_unknown=True propagates downwards across all schema barriers, including if `propagate_unknown=False` is set explicitly somewhere, this could be confusing. However, because the idea is for `propagate_unknown=True` to eventually be the only supported behavior for marshmallow, this is acceptable as a limitation. auto_unknown is an attribute of schema opts and of schema instances, with the same kind of inheritance behavior as other fields. --- src/marshmallow/fields.py | 9 ++++++ src/marshmallow/schema.py | 12 +++++++- tests/test_schema.py | 63 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index ad292143e..eb1b8a4af 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -608,6 +608,15 @@ def _deserialize( Add ``partial`` parameter. """ self._test_collection(value) + # check if self.unknown or self.schema.unknown is set + # however, we should only respect `self.schema.unknown` if + # `auto_unknown` is False, meaning that it was set explicitly on the + # schema class or instance + explicit_unknown = self.unknown or ( + self.schema.unknown if not self.schema.auto_unknown else None + ) + if explicit_unknown: + unknown = explicit_unknown return self._load( value, data, diff --git a/src/marshmallow/schema.py b/src/marshmallow/schema.py index c52aadc9a..690ded829 100644 --- a/src/marshmallow/schema.py +++ b/src/marshmallow/schema.py @@ -227,7 +227,14 @@ def __init__(self, meta, ordered: bool = False): self.include = getattr(meta, "include", {}) self.load_only = getattr(meta, "load_only", ()) self.dump_only = getattr(meta, "dump_only", ()) - self.unknown = getattr(meta, "unknown", RAISE) + # self.unknown defaults to "RAISE", but note whether it was explicit or + # not, so that when we're handling propagate_unknown we can decide + # whether or not to propagate based on whether or not it was set + # explicitly + self.unknown = getattr(meta, "unknown", None) + self.auto_unknown = self.unknown is None + if self.auto_unknown: + self.unknown = RAISE self.propagate_unknown = getattr(meta, "propagate_unknown", False) self.register = getattr(meta, "register", True) @@ -391,6 +398,9 @@ def __init__( self.dump_only = set(dump_only) or set(self.opts.dump_only) self.partial = partial self.unknown = unknown or self.opts.unknown + # if unknown was not set explicitly AND self.opts.auto_unknown is true, + # then the value should be considered "automatic" + self.auto_unknown = (not unknown) and self.opts.auto_unknown self.propagate_unknown = propagate_unknown or self.opts.propagate_unknown self.context = context or {} self._normalize_nested_options() diff --git a/tests/test_schema.py b/tests/test_schema.py index 85646754b..2bdb00ed3 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -2901,3 +2901,66 @@ class DefinitelyUniqueSchema(Schema): SchemaClass = class_registry.get_class(DefinitelyUniqueSchema.__name__) assert SchemaClass is DefinitelyUniqueSchema + + +def test_propagate_unknown_stops_at_explicit_value_for_nested(): + # propagate_unknown=True should traverse any "auto_unknown" values and + # replace them with the "unknown" value from the parent context (schema or + # load arguments) + # this test makes sure that it stops when a nested field or schema has + # "unknown" set explicitly (so auto_unknown=False) + + class Bottom(Schema): + x = fields.Str() + + class Middle(Schema): + x = fields.Str() + # set unknown explicitly on a nested field, so auto_unknown will be + # false going into Bottom + child = fields.Nested(Bottom, unknown=EXCLUDE) + + class Top(Schema): + x = fields.Str() + child = fields.Nested(Middle) + + data = { + "x": "hi", + "y": "bye", + "child": {"x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye"}}, + } + result = Top(unknown=INCLUDE, propagate_unknown=True).load(data) + assert result == { + "x": "hi", + "y": "bye", + "child": {"x": "hi", "y": "bye", "child": {"x": "hi"}}, + } + + +def test_propagate_unknown_stops_at_explicit_value_for_meta(): + # this is the same as the above test of propagate_unknown stopping where + # auto_unknown=False, but it checks that this applies when `unknown` is set + # by means of `Meta` + + class Bottom(Schema): + x = fields.Str() + + class Middle(Schema): + x = fields.Str() + child = fields.Nested(Bottom) + + # set unknown explicitly on a nested field, so auto_unknown will be + # false going into Bottom + class Meta: + unknown = EXCLUDE + + class Top(Schema): + x = fields.Str() + child = fields.Nested(Middle) + + data = { + "x": "hi", + "y": "bye", + "child": {"x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye"}}, + } + result = Top(unknown=INCLUDE, propagate_unknown=True).load(data) + assert result == {"x": "hi", "y": "bye", "child": {"x": "hi", "child": {"x": "hi"}}} From 3e711d797d49cbd5eb9ece0f6e7bec775df5eb61 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Fri, 17 Jul 2020 02:55:43 +0000 Subject: [PATCH 4/6] Add changelog and docs for propagate_unknown The changelog entry, including credit to prior related work, covers the closed issues and describes how `propagate_unknown` is expected to behave. Inline documentation attempts to strike a balance between clarify and brevity. closes #1428, #1429, #1490, #1575 --- CHANGELOG.rst | 28 ++++++++++++++++++++++++++++ src/marshmallow/fields.py | 4 ++++ src/marshmallow/schema.py | 19 +++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2bf713345..b7b783d30 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,34 @@ Changelog --------- +3.8.0 (Unreleased) +****************** + +Features: + +- The behavior of the ``unknown`` option can be further customized with a + second option, ``propagate_unknown``. When set to ``True``, any nested + schemas will receive the same ``unknown`` value as their parent if they did + not set an ``unknown`` value explicitly. + ``propagate_unknown`` itself propagates across any number of layers of + nesting and cannot be disabled by a child schema if it enabled in its parent. + (:issue:`1490`, :issue:`1428`) + Thanks :user:`lmignon` and :user:`mahenzon`. + +.. note:: + + In order to retain flexibility, when a schema is being loaded with + ``propagate_unknown=True``, you can still set ``unknown`` explicitly on child + schemas. However, be aware that such a value will be propagated to any of its + descendants. + + For example, a schema which specifies ``unknown=EXCLUDE`` will set + ``EXCLUDE`` in all of its children. If one of the children specifies + ``unknown=IGNORE``, then ``IGNORE`` will be passed to the relevant + grandchildren. + Other children of the original schema could specify ``unknown=RAISE``, and + this would apply to them equally well. + 3.7.1 (2020-07-20) ****************** diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index eb1b8a4af..d42fb9c15 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -459,6 +459,10 @@ class ParentSchema(Schema): :param many: Whether the field is a collection of objects. :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. + :param propagate_unknown: If ``True``, the value for ``unknown`` will be + applied to any ``Nested`` fields within the nested schema. Propagates down and allows + ``Nested`` fields to apply their own ``unknown`` value. Note that this + only has an effect when there are multiple layers of nesting :param kwargs: The same keyword arguments that :class:`Field` receives. """ diff --git a/src/marshmallow/schema.py b/src/marshmallow/schema.py index 690ded829..a0859b47a 100644 --- a/src/marshmallow/schema.py +++ b/src/marshmallow/schema.py @@ -287,6 +287,9 @@ class AlbumSchema(Schema): will be ignored. Use dot delimiters to specify nested fields. :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. + :param propagate_unknown: If ``True``, the value for ``unknown`` will be + applied to all ``Nested`` fields. Propagates down and allows + ``Nested`` fields to apply their own ``unknown`` value .. versionchanged:: 3.0.0 `prefix` parameter removed. @@ -364,6 +367,10 @@ class Meta: - ``dump_only``: Tuple or list of fields to exclude from deserialization - ``unknown``: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. + - ``propagate_unknown`` If ``True``, the value for ``unknown`` will be + applied to all ``Nested`` fields. Propagates down, but if a ``Nested`` + field sets ``unknown``, it will begin to propagate that value, not the + where this is set. - ``register``: Whether to register the `Schema` with marshmallow's internal class registry. Must be `True` if you intend to refer to this `Schema` by class name in `Nested` fields. Only set this to `False` when memory @@ -619,6 +626,9 @@ def _deserialize( will be ignored. Use dot delimiters to specify nested fields. :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. + :param propagate_unknown: If ``True``, the value for ``unknown`` will be + applied to all ``Nested`` fields. Propagates down and allows + ``Nested`` fields to apply their own ``unknown`` value :param int index: Index of the item being serialized (for storing errors) if serializing a collection, otherwise `None`. :return: A dictionary of the deserialized data. @@ -733,6 +743,9 @@ def load( :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. If `None`, the value for `self.unknown` is used. + :param propagate_unknown: If ``True``, the value for ``unknown`` will be + applied to all ``Nested`` fields. Propagates down and allows + ``Nested`` fields to apply their own ``unknown`` value :return: Deserialized data .. versionadded:: 1.0.0 @@ -772,6 +785,9 @@ def loads( :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. If `None`, the value for `self.unknown` is used. + :param propagate_unknown: If ``True``, the value for ``unknown`` will be + applied to all ``Nested`` fields. Propagates down and allows + ``Nested`` fields to apply their own ``unknown`` value :return: Deserialized data .. versionadded:: 1.0.0 @@ -864,6 +880,9 @@ def _do_load( :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. If `None`, the value for `self.unknown` is used. + :param propagate_unknown: If ``True``, the value for ``unknown`` will be + applied to all ``Nested`` fields. Propagates down and allows + ``Nested`` fields to apply their own ``unknown`` value :param postprocess: Whether to run post_load methods.. :return: Deserialized data """ From e495fbeaa0c898c09cce03fb11bc880473396b49 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Fri, 4 Sep 2020 19:18:08 +0000 Subject: [PATCH 5/6] Replace `propagate_unknown` with or-able `unknown` Rather than having a new parameter which allows `propagate_unknown` behavior, convert this into an option which can be set on the ``unknown`` parameter itself. Drawing from other interfaces which present flags as or-able values (traditionally bitmasks), express the setting of this option as `x | PROPAGATE` for any supported `x`. This makes it harder to propagate the value of `propagate` separately from the rest of the value of `unknown`. For simplicity, change the behavior here to suit this implementation. --- CHANGELOG.rst | 28 +++++++-------- src/marshmallow/__init__.py | 3 +- src/marshmallow/base.py | 17 ++------- src/marshmallow/fields.py | 45 ++++++----------------- src/marshmallow/schema.py | 72 +++++++++++-------------------------- src/marshmallow/utils.py | 57 ++++++++++++++++++++++++----- tests/test_fields.py | 19 +++++----- tests/test_schema.py | 21 +++++++---- 8 files changed, 122 insertions(+), 140 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b7b783d30..165ea3a13 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,28 +6,26 @@ Changelog Features: -- The behavior of the ``unknown`` option can be further customized with a - second option, ``propagate_unknown``. When set to ``True``, any nested - schemas will receive the same ``unknown`` value as their parent if they did - not set an ``unknown`` value explicitly. - ``propagate_unknown`` itself propagates across any number of layers of - nesting and cannot be disabled by a child schema if it enabled in its parent. +- The behavior of the ``unknown`` option can be further customized with a new + value, ``PROPAGATE``. If ``unknown=EXCLUDE | PROPAGATE`` is set, then the + value of ``unknown=EXCLUDE | PROPAGATE`` will be passed to any nested + schemas which do not explicitly set ``unknown`` in ``Nested`` or schema + options. This works for ``INCLUDE | PROPAGATE`` and ``RAISE | PROPAGATE`` as + well. (:issue:`1490`, :issue:`1428`) Thanks :user:`lmignon` and :user:`mahenzon`. .. note:: - In order to retain flexibility, when a schema is being loaded with - ``propagate_unknown=True``, you can still set ``unknown`` explicitly on child - schemas. However, be aware that such a value will be propagated to any of its - descendants. + When a schema is being loaded with ``unknown=... | PROPAGATE``, you can still + set ``unknown`` explicitly on child schemas. However, be aware that such a + value may turn off propagation at that point in the schema hierarchy. For example, a schema which specifies ``unknown=EXCLUDE`` will set - ``EXCLUDE`` in all of its children. If one of the children specifies - ``unknown=IGNORE``, then ``IGNORE`` will be passed to the relevant - grandchildren. - Other children of the original schema could specify ``unknown=RAISE``, and - this would apply to them equally well. + ``EXCLUDE`` for itself. But because the value is ``EXCLUDE`` rather than + ``EXCLUDE | PROPAGATE``, that setting will not be propagated to its + children, even if there is a parent schema which sets + ``unknown=EXCLUDE | PROPAGATE``. 3.7.1 (2020-07-20) ****************** diff --git a/src/marshmallow/__init__.py b/src/marshmallow/__init__.py index 7b0b21b4d..c4617e6ef 100644 --- a/src/marshmallow/__init__.py +++ b/src/marshmallow/__init__.py @@ -9,7 +9,7 @@ validates, validates_schema, ) -from marshmallow.utils import EXCLUDE, INCLUDE, RAISE, pprint, missing +from marshmallow.utils import EXCLUDE, INCLUDE, RAISE, PROPAGATE, pprint, missing from marshmallow.exceptions import ValidationError from distutils.version import LooseVersion @@ -19,6 +19,7 @@ "EXCLUDE", "INCLUDE", "RAISE", + "PROPAGATE", "Schema", "SchemaOpts", "fields", diff --git a/src/marshmallow/base.py b/src/marshmallow/base.py index 05db11144..e3cbde749 100644 --- a/src/marshmallow/base.py +++ b/src/marshmallow/base.py @@ -39,24 +39,11 @@ def dumps(self, obj, *, many: bool = None): raise NotImplementedError def load( - self, - data, - *, - many: bool = None, - partial=None, - unknown=None, - propagate_unknown=None + self, data, *, many: bool = None, partial=None, unknown=None ): raise NotImplementedError def loads( - self, - json_data, - *, - many: bool = None, - partial=None, - unknown=None, - propagate_unknown=None, - **kwargs + self, json_data, *, many: bool = None, partial=None, unknown=None, **kwargs ): raise NotImplementedError diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index d42fb9c15..b62d9d090 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -18,6 +18,7 @@ missing as missing_, resolve_field_instance, is_aware, + UnknownParam, ) from marshmallow.exceptions import ( ValidationError, @@ -459,10 +460,6 @@ class ParentSchema(Schema): :param many: Whether the field is a collection of objects. :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. - :param propagate_unknown: If ``True``, the value for ``unknown`` will be - applied to any ``Nested`` fields within the nested schema. Propagates down and allows - ``Nested`` fields to apply their own ``unknown`` value. Note that this - only has an effect when there are multiple layers of nesting :param kwargs: The same keyword arguments that :class:`Field` receives. """ @@ -477,8 +474,7 @@ def __init__( only: types.StrSequenceOrSet = None, exclude: types.StrSequenceOrSet = (), many: bool = False, - unknown: str = None, - propagate_unknown: bool = None, + unknown: typing.Union[str, UnknownParam] = None, **kwargs ): # Raise error if only or exclude is passed as string, not list of strings @@ -498,8 +494,7 @@ def __init__( self.only = only self.exclude = exclude self.many = many - self.unknown = unknown - self.propagate_unknown = propagate_unknown + self.unknown = UnknownParam.parse_if_str(unknown) if unknown else None self._schema = None # Cached Schema instance super().__init__(default=default, **kwargs) @@ -577,15 +572,10 @@ def _test_collection(self, value): if many and not utils.is_collection(value): raise self.make_error("type", input=value, type=value.__class__.__name__) - def _load(self, value, data, partial=None, unknown=None, propagate_unknown=None): + def _load(self, value, data, partial=None, unknown=None): try: valid_data = self.schema.load( - value, - unknown=unknown or self.unknown, - propagate_unknown=propagate_unknown - if propagate_unknown is not None - else self.propagate_unknown, - partial=partial, + value, unknown=unknown if unknown else self.unknown, partial=partial, ) except ValidationError as error: raise ValidationError( @@ -593,16 +583,7 @@ def _load(self, value, data, partial=None, unknown=None, propagate_unknown=None) ) from error return valid_data - def _deserialize( - self, - value, - attr, - data, - partial=None, - unknown=None, - propagate_unknown=None, - **kwargs - ): + def _deserialize(self, value, attr, data, partial=None, unknown=None, **kwargs): """Same as :meth:`Field._deserialize` with additional ``partial`` argument. :param bool|tuple partial: For nested schemas, the ``partial`` @@ -616,18 +597,14 @@ def _deserialize( # however, we should only respect `self.schema.unknown` if # `auto_unknown` is False, meaning that it was set explicitly on the # schema class or instance - explicit_unknown = self.unknown or ( - self.schema.unknown if not self.schema.auto_unknown else None + explicit_unknown = ( + self.unknown + if self.unknown + else (self.schema.unknown if not self.schema.auto_unknown else None) ) if explicit_unknown: unknown = explicit_unknown - return self._load( - value, - data, - partial=partial, - unknown=unknown, - propagate_unknown=propagate_unknown, - ) + return self._load(value, data, partial=partial, unknown=unknown,) class Pluck(Nested): diff --git a/src/marshmallow/schema.py b/src/marshmallow/schema.py index a0859b47a..4770fd533 100644 --- a/src/marshmallow/schema.py +++ b/src/marshmallow/schema.py @@ -27,6 +27,7 @@ RAISE, EXCLUDE, INCLUDE, + UnknownParam, missing, set_value, get_value, @@ -228,14 +229,14 @@ def __init__(self, meta, ordered: bool = False): self.load_only = getattr(meta, "load_only", ()) self.dump_only = getattr(meta, "dump_only", ()) # self.unknown defaults to "RAISE", but note whether it was explicit or - # not, so that when we're handling propagate_unknown we can decide + # not, so that when we're handling "propagate" we can decide # whether or not to propagate based on whether or not it was set # explicitly self.unknown = getattr(meta, "unknown", None) self.auto_unknown = self.unknown is None if self.auto_unknown: self.unknown = RAISE - self.propagate_unknown = getattr(meta, "propagate_unknown", False) + self.unknown = UnknownParam.parse_if_str(self.unknown) self.register = getattr(meta, "register", True) @@ -287,9 +288,6 @@ class AlbumSchema(Schema): will be ignored. Use dot delimiters to specify nested fields. :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. - :param propagate_unknown: If ``True``, the value for ``unknown`` will be - applied to all ``Nested`` fields. Propagates down and allows - ``Nested`` fields to apply their own ``unknown`` value .. versionchanged:: 3.0.0 `prefix` parameter removed. @@ -367,10 +365,6 @@ class Meta: - ``dump_only``: Tuple or list of fields to exclude from deserialization - ``unknown``: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. - - ``propagate_unknown`` If ``True``, the value for ``unknown`` will be - applied to all ``Nested`` fields. Propagates down, but if a ``Nested`` - field sets ``unknown``, it will begin to propagate that value, not the - where this is set. - ``register``: Whether to register the `Schema` with marshmallow's internal class registry. Must be `True` if you intend to refer to this `Schema` by class name in `Nested` fields. Only set this to `False` when memory @@ -387,8 +381,7 @@ def __init__( load_only: types.StrSequenceOrSet = (), dump_only: types.StrSequenceOrSet = (), partial: typing.Union[bool, types.StrSequenceOrSet] = False, - unknown: str = None, - propagate_unknown: bool = None + unknown: str = None ): # Raise error if only or exclude is passed as string, not list of strings if only is not None and not is_collection(only): @@ -404,11 +397,14 @@ def __init__( self.load_only = set(load_only) or set(self.opts.load_only) self.dump_only = set(dump_only) or set(self.opts.dump_only) self.partial = partial - self.unknown = unknown or self.opts.unknown + self.unknown = ( + UnknownParam.parse_if_str(unknown) + if unknown is not None + else self.opts.unknown + ) # if unknown was not set explicitly AND self.opts.auto_unknown is true, # then the value should be considered "automatic" self.auto_unknown = (not unknown) and self.opts.auto_unknown - self.propagate_unknown = propagate_unknown or self.opts.propagate_unknown self.context = context or {} self._normalize_nested_options() #: Dictionary mapping field_names -> :class:`Field` objects @@ -612,7 +608,6 @@ def _deserialize( many: bool = False, partial=False, unknown=RAISE, - propagate_unknown=False, index=None ) -> typing.Union[_T, typing.List[_T]]: """Deserialize ``data``. @@ -626,13 +621,11 @@ def _deserialize( will be ignored. Use dot delimiters to specify nested fields. :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. - :param propagate_unknown: If ``True``, the value for ``unknown`` will be - applied to all ``Nested`` fields. Propagates down and allows - ``Nested`` fields to apply their own ``unknown`` value :param int index: Index of the item being serialized (for storing errors) if serializing a collection, otherwise `None`. :return: A dictionary of the deserialized data. """ + unknown = UnknownParam.parse_if_str(unknown) index_errors = self.opts.index_errors index = index if index_errors else None if many: @@ -649,7 +642,6 @@ def _deserialize( many=False, partial=partial, unknown=unknown, - propagate_unknown=propagate_unknown, index=idx, ), ) @@ -685,9 +677,8 @@ def _deserialize( else: d_kwargs["partial"] = partial - if propagate_unknown: + if unknown.propagate: d_kwargs["unknown"] = unknown - d_kwargs["propagate_unknown"] = True getter = lambda val: field_obj.deserialize( val, field_name, data, **d_kwargs @@ -702,16 +693,16 @@ def _deserialize( if value is not missing: key = field_obj.attribute or attr_name set_value(typing.cast(typing.Dict, ret), key, value) - if unknown != EXCLUDE: + if unknown.value != EXCLUDE.value: fields = { field_obj.data_key if field_obj.data_key is not None else field_name for field_name, field_obj in self.load_fields.items() } for key in set(data) - fields: value = data[key] - if unknown == INCLUDE: + if unknown.value == INCLUDE.value: set_value(typing.cast(typing.Dict, ret), key, value) - elif unknown == RAISE: + elif unknown.value == RAISE.value: error_store.store_error( [self.error_messages["unknown"]], key, @@ -728,8 +719,7 @@ def load( *, many: bool = None, partial: typing.Union[bool, types.StrSequenceOrSet] = None, - unknown: str = None, - propagate_unknown: bool = None + unknown: str = None ): """Deserialize a data structure to an object defined by this Schema's fields. @@ -743,9 +733,6 @@ def load( :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. If `None`, the value for `self.unknown` is used. - :param propagate_unknown: If ``True``, the value for ``unknown`` will be - applied to all ``Nested`` fields. Propagates down and allows - ``Nested`` fields to apply their own ``unknown`` value :return: Deserialized data .. versionadded:: 1.0.0 @@ -755,12 +742,7 @@ def load( if invalid data are passed. """ return self._do_load( - data, - many=many, - partial=partial, - unknown=unknown, - propagate_unknown=propagate_unknown, - postprocess=True, + data, many=many, partial=partial, unknown=unknown, postprocess=True, ) def loads( @@ -770,7 +752,6 @@ def loads( many: bool = None, partial: typing.Union[bool, types.StrSequenceOrSet] = None, unknown: str = None, - propagate_unknown: bool = None, **kwargs ): """Same as :meth:`load`, except it takes a JSON string as input. @@ -785,9 +766,6 @@ def loads( :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. If `None`, the value for `self.unknown` is used. - :param propagate_unknown: If ``True``, the value for ``unknown`` will be - applied to all ``Nested`` fields. Propagates down and allows - ``Nested`` fields to apply their own ``unknown`` value :return: Deserialized data .. versionadded:: 1.0.0 @@ -797,13 +775,7 @@ def loads( if invalid data are passed. """ data = self.opts.render_module.loads(json_data, **kwargs) - return self.load( - data, - many=many, - partial=partial, - unknown=unknown, - propagate_unknown=propagate_unknown, - ) + return self.load(data, many=many, partial=partial, unknown=unknown,) def _run_validator( self, @@ -864,7 +836,6 @@ def _do_load( many: bool = None, partial: typing.Union[bool, types.StrSequenceOrSet] = None, unknown: str = None, - propagate_unknown: bool = None, postprocess: bool = True ): """Deserialize `data`, returning the deserialized result. @@ -880,17 +851,15 @@ def _do_load( :param unknown: Whether to exclude, include, or raise an error for unknown fields in the data. Use `EXCLUDE`, `INCLUDE` or `RAISE`. If `None`, the value for `self.unknown` is used. - :param propagate_unknown: If ``True``, the value for ``unknown`` will be - applied to all ``Nested`` fields. Propagates down and allows - ``Nested`` fields to apply their own ``unknown`` value :param postprocess: Whether to run post_load methods.. :return: Deserialized data """ error_store = ErrorStore() errors = {} # type: typing.Dict[str, typing.List[str]] many = self.many if many is None else bool(many) - unknown = unknown or self.unknown - propagate_unknown = propagate_unknown or self.propagate_unknown + unknown = UnknownParam.parse_if_str( + unknown if unknown is not None else self.unknown + ) if partial is None: partial = self.partial # Run preprocessors @@ -914,7 +883,6 @@ def _do_load( many=many, partial=partial, unknown=unknown, - propagate_unknown=propagate_unknown, ) # Run field-level validation self._invoke_field_validators( diff --git a/src/marshmallow/utils.py b/src/marshmallow/utils.py index 61ed36404..3eab1f46e 100644 --- a/src/marshmallow/utils.py +++ b/src/marshmallow/utils.py @@ -1,7 +1,7 @@ """Utility methods for marshmallow.""" import collections -import functools import datetime as dt +import functools import inspect import json import re @@ -15,9 +15,52 @@ from marshmallow.exceptions import FieldInstanceResolutionError from marshmallow.warnings import RemovedInMarshmallow4Warning -EXCLUDE = "exclude" -INCLUDE = "include" -RAISE = "raise" + +class UnknownParam: + good_values = ("exclude", "include", "raise") + + def __init__(self, stringval=None, *, value=None, propagate=None): + self.value = value + self.propagate = propagate + + if stringval: + for x in stringval.lower().split("|"): + x = x.strip() + if x in self.good_values and not self.value: + self.value = x + if x == "propagate": + self.propagate = True + + def __or__(self, other): + return UnknownParam( + value=self.value or other.value, propagate=self.propagate or other.propagate + ) + + def __str__(self): + parts = [self.value] if self.value else [] + if self.propagate: + parts.append("propagate") + if self.value or self.propagate: + return "|".join(parts) + return "null" + + def __repr__(self): + return "UnknownParam(value={!r}, propagate={!r})".format( + self.value, self.propagate + ) + + @classmethod + def parse_if_str(cls, value): + """Given a string or UnknownParam, convert to an UnknownParam""" + if isinstance(value, str): + return cls(value) + return value + + +EXCLUDE = UnknownParam("exclude") +INCLUDE = UnknownParam("include") +RAISE = UnknownParam("raise") +PROPAGATE = UnknownParam("propagate") class _Missing: @@ -41,8 +84,7 @@ def __repr__(self): def is_generator(obj) -> bool: - """Return True if ``obj`` is a generator - """ + """Return True if ``obj`` is a generator""" return inspect.isgeneratorfunction(obj) or inspect.isgenerator(obj) @@ -279,8 +321,7 @@ def set_value(dct: typing.Dict[str, typing.Any], key: str, value: typing.Any): def callable_or_raise(obj): - """Check that an object is callable, else raise a :exc:`ValueError`. - """ + """Check that an object is callable, else raise a :exc:`ValueError`.""" if not callable(obj): raise ValueError("Object {!r} is not callable.".format(obj)) return obj diff --git a/tests/test_fields.py b/tests/test_fields.py index c155353fc..71cdca5b3 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -6,6 +6,7 @@ ValidationError, EXCLUDE, INCLUDE, + PROPAGATE, RAISE, missing, ) @@ -317,10 +318,11 @@ def multi_nested_data_with_unknown(self, data_nested_unknown): @pytest.mark.parametrize( "schema_kwargs,load_kwargs", [ - ({}, {"propagate_unknown": True, "unknown": INCLUDE}), - ({"propagate_unknown": True}, {"unknown": INCLUDE}), - ({"propagate_unknown": True, "unknown": INCLUDE}, {}), - ({"unknown": INCLUDE}, {"propagate_unknown": True}), + ({}, {"unknown": INCLUDE | PROPAGATE}), + ({}, {"unknown": "INCLUDE | PROPAGATE"}), + ({"unknown": RAISE}, {"unknown": INCLUDE | PROPAGATE}), + ({"unknown": RAISE}, {"unknown": "include|propagate"}), + ({"unknown": INCLUDE | PROPAGATE}, {}), ], ) def test_propagate_unknown_include( @@ -344,10 +346,11 @@ def test_propagate_unknown_include( @pytest.mark.parametrize( "schema_kwargs,load_kwargs", [ - ({}, {"propagate_unknown": True, "unknown": EXCLUDE}), - ({"propagate_unknown": True}, {"unknown": EXCLUDE}), - ({"propagate_unknown": True, "unknown": EXCLUDE}, {}), - ({"unknown": EXCLUDE}, {"propagate_unknown": True}), + ({}, {"unknown": EXCLUDE | PROPAGATE}), + ({}, {"unknown": "exclude | propagate"}), + ({"unknown": RAISE}, {"unknown": EXCLUDE | PROPAGATE}), + ({"unknown": PROPAGATE | EXCLUDE}, {}), + ({"unknown": "propagate|exclude"}, {}), ], ) def test_propagate_unknown_exclude( diff --git a/tests/test_schema.py b/tests/test_schema.py index 2bdb00ed3..78ac5fa9a 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -17,6 +17,7 @@ EXCLUDE, INCLUDE, RAISE, + PROPAGATE, class_registry, ) from marshmallow.exceptions import ( @@ -2904,7 +2905,7 @@ class DefinitelyUniqueSchema(Schema): def test_propagate_unknown_stops_at_explicit_value_for_nested(): - # propagate_unknown=True should traverse any "auto_unknown" values and + # PROPAGATE should traverse any "auto_unknown" values and # replace them with the "unknown" value from the parent context (schema or # load arguments) # this test makes sure that it stops when a nested field or schema has @@ -2928,7 +2929,7 @@ class Top(Schema): "y": "bye", "child": {"x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye"}}, } - result = Top(unknown=INCLUDE, propagate_unknown=True).load(data) + result = Top(unknown=INCLUDE | PROPAGATE).load(data) assert result == { "x": "hi", "y": "bye", @@ -2937,7 +2938,7 @@ class Top(Schema): def test_propagate_unknown_stops_at_explicit_value_for_meta(): - # this is the same as the above test of propagate_unknown stopping where + # this is the same as the above test of unknown propagation stopping where # auto_unknown=False, but it checks that this applies when `unknown` is set # by means of `Meta` @@ -2948,19 +2949,25 @@ class Middle(Schema): x = fields.Str() child = fields.Nested(Bottom) - # set unknown explicitly on a nested field, so auto_unknown will be - # false going into Bottom + # set unknown explicitly here, so auto_unknown will be + # false going into Bottom, and also set propagate to make it propagate + # in this case class Meta: - unknown = EXCLUDE + unknown = EXCLUDE | PROPAGATE class Top(Schema): x = fields.Str() child = fields.Nested(Middle) + # sanity-check that auto-unknown is being set correctly + assert Top().auto_unknown + assert not Top(unknown=INCLUDE | PROPAGATE).auto_unknown + assert not Middle().auto_unknown + data = { "x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye"}}, } - result = Top(unknown=INCLUDE, propagate_unknown=True).load(data) + result = Top(unknown=INCLUDE | PROPAGATE).load(data) assert result == {"x": "hi", "y": "bye", "child": {"x": "hi", "child": {"x": "hi"}}} From 499d6089afdce71ad364a679a3269fb5b6521bce Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Fri, 4 Sep 2020 19:34:34 +0000 Subject: [PATCH 6/6] Make unknown propagation override child schemas In order to simplify, the value of `unknown` will not be respected in child schemas if `PROPAGATE` is used. This removes any need for the `auto_unknown` tracking, so there's less complexity added to schemas in order to implement this behavior. Adjust tests and changelog to match. Also make minor tweaks to ensure UnknownParam usage is consistent and keep the diff with the dev branch smaller. --- CHANGELOG.rst | 20 ++++++-------------- src/marshmallow/base.py | 4 +--- src/marshmallow/fields.py | 17 +++-------------- src/marshmallow/schema.py | 25 ++++--------------------- src/marshmallow/utils.py | 5 ++++- tests/test_schema.py | 36 +++++++++++++++--------------------- 6 files changed, 33 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 165ea3a13..285424729 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,24 +8,16 @@ Features: - The behavior of the ``unknown`` option can be further customized with a new value, ``PROPAGATE``. If ``unknown=EXCLUDE | PROPAGATE`` is set, then the - value of ``unknown=EXCLUDE | PROPAGATE`` will be passed to any nested - schemas which do not explicitly set ``unknown`` in ``Nested`` or schema - options. This works for ``INCLUDE | PROPAGATE`` and ``RAISE | PROPAGATE`` as - well. + value of ``unknown=EXCLUDE | PROPAGATE`` will be passed to any nested schemas. + This works for ``INCLUDE | PROPAGATE`` and ``RAISE | PROPAGATE`` as well. (:issue:`1490`, :issue:`1428`) - Thanks :user:`lmignon` and :user:`mahenzon`. .. note:: - When a schema is being loaded with ``unknown=... | PROPAGATE``, you can still - set ``unknown`` explicitly on child schemas. However, be aware that such a - value may turn off propagation at that point in the schema hierarchy. - - For example, a schema which specifies ``unknown=EXCLUDE`` will set - ``EXCLUDE`` for itself. But because the value is ``EXCLUDE`` rather than - ``EXCLUDE | PROPAGATE``, that setting will not be propagated to its - children, even if there is a parent schema which sets - ``unknown=EXCLUDE | PROPAGATE``. + When a schema is being loaded with ``unknown=... | PROPAGATE``, this will + override any values set for ``unknown`` in child schemas. Therefore, + ``PROPAGATE`` should only be used in cases in which you want to change + the behavior of an entire schema heirarchy. 3.7.1 (2020-07-20) ****************** diff --git a/src/marshmallow/base.py b/src/marshmallow/base.py index e3cbde749..6e739b350 100644 --- a/src/marshmallow/base.py +++ b/src/marshmallow/base.py @@ -38,9 +38,7 @@ def dump(self, obj, *, many: bool = None): def dumps(self, obj, *, many: bool = None): raise NotImplementedError - def load( - self, data, *, many: bool = None, partial=None, unknown=None - ): + def load(self, data, *, many: bool = None, partial=None, unknown=None): raise NotImplementedError def loads( diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index b62d9d090..e16065051 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -494,7 +494,7 @@ def __init__( self.only = only self.exclude = exclude self.many = many - self.unknown = UnknownParam.parse_if_str(unknown) if unknown else None + self.unknown = UnknownParam.parse_if_str(unknown) self._schema = None # Cached Schema instance super().__init__(default=default, **kwargs) @@ -575,7 +575,7 @@ def _test_collection(self, value): def _load(self, value, data, partial=None, unknown=None): try: valid_data = self.schema.load( - value, unknown=unknown if unknown else self.unknown, partial=partial, + value, unknown=unknown or self.unknown, partial=partial, ) except ValidationError as error: raise ValidationError( @@ -593,18 +593,7 @@ def _deserialize(self, value, attr, data, partial=None, unknown=None, **kwargs): Add ``partial`` parameter. """ self._test_collection(value) - # check if self.unknown or self.schema.unknown is set - # however, we should only respect `self.schema.unknown` if - # `auto_unknown` is False, meaning that it was set explicitly on the - # schema class or instance - explicit_unknown = ( - self.unknown - if self.unknown - else (self.schema.unknown if not self.schema.auto_unknown else None) - ) - if explicit_unknown: - unknown = explicit_unknown - return self._load(value, data, partial=partial, unknown=unknown,) + return self._load(value, data, partial=partial, unknown=unknown) class Pluck(Nested): diff --git a/src/marshmallow/schema.py b/src/marshmallow/schema.py index 4770fd533..a2b2d0f7e 100644 --- a/src/marshmallow/schema.py +++ b/src/marshmallow/schema.py @@ -228,15 +228,7 @@ def __init__(self, meta, ordered: bool = False): self.include = getattr(meta, "include", {}) self.load_only = getattr(meta, "load_only", ()) self.dump_only = getattr(meta, "dump_only", ()) - # self.unknown defaults to "RAISE", but note whether it was explicit or - # not, so that when we're handling "propagate" we can decide - # whether or not to propagate based on whether or not it was set - # explicitly - self.unknown = getattr(meta, "unknown", None) - self.auto_unknown = self.unknown is None - if self.auto_unknown: - self.unknown = RAISE - self.unknown = UnknownParam.parse_if_str(self.unknown) + self.unknown = UnknownParam.parse_if_str(getattr(meta, "unknown", RAISE)) self.register = getattr(meta, "register", True) @@ -397,14 +389,7 @@ def __init__( self.load_only = set(load_only) or set(self.opts.load_only) self.dump_only = set(dump_only) or set(self.opts.dump_only) self.partial = partial - self.unknown = ( - UnknownParam.parse_if_str(unknown) - if unknown is not None - else self.opts.unknown - ) - # if unknown was not set explicitly AND self.opts.auto_unknown is true, - # then the value should be considered "automatic" - self.auto_unknown = (not unknown) and self.opts.auto_unknown + self.unknown = UnknownParam.parse_if_str(unknown) or self.opts.unknown self.context = context or {} self._normalize_nested_options() #: Dictionary mapping field_names -> :class:`Field` objects @@ -775,7 +760,7 @@ def loads( if invalid data are passed. """ data = self.opts.render_module.loads(json_data, **kwargs) - return self.load(data, many=many, partial=partial, unknown=unknown,) + return self.load(data, many=many, partial=partial, unknown=unknown) def _run_validator( self, @@ -857,9 +842,7 @@ def _do_load( error_store = ErrorStore() errors = {} # type: typing.Dict[str, typing.List[str]] many = self.many if many is None else bool(many) - unknown = UnknownParam.parse_if_str( - unknown if unknown is not None else self.unknown - ) + unknown = UnknownParam.parse_if_str(unknown or self.unknown) if partial is None: partial = self.partial # Run preprocessors diff --git a/src/marshmallow/utils.py b/src/marshmallow/utils.py index 3eab1f46e..3a99a015f 100644 --- a/src/marshmallow/utils.py +++ b/src/marshmallow/utils.py @@ -51,7 +51,10 @@ def __repr__(self): @classmethod def parse_if_str(cls, value): - """Given a string or UnknownParam, convert to an UnknownParam""" + """Given a string or UnknownParam, convert to an UnknownParam + + Preserves None, which is important for making sure that it can be used + blindly on `unknown` which may be a user-supplied value or a default""" if isinstance(value, str): return cls(value) return value diff --git a/tests/test_schema.py b/tests/test_schema.py index 78ac5fa9a..f8df84b08 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -2904,12 +2904,11 @@ class DefinitelyUniqueSchema(Schema): assert SchemaClass is DefinitelyUniqueSchema -def test_propagate_unknown_stops_at_explicit_value_for_nested(): - # PROPAGATE should traverse any "auto_unknown" values and - # replace them with the "unknown" value from the parent context (schema or - # load arguments) - # this test makes sure that it stops when a nested field or schema has - # "unknown" set explicitly (so auto_unknown=False) +def test_propagate_unknown_overrides_explicit_value_for_nested(): + # PROPAGATE should traverse any schemas and replace them with the + # "unknown" value from the parent context (schema or load arguments) + # this test makes sure that it takes precedence when a nested field + # or schema has "unknown" set explicitly class Bottom(Schema): x = fields.Str() @@ -2933,14 +2932,13 @@ class Top(Schema): assert result == { "x": "hi", "y": "bye", - "child": {"x": "hi", "y": "bye", "child": {"x": "hi"}}, + "child": {"x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye"}}, } -def test_propagate_unknown_stops_at_explicit_value_for_meta(): - # this is the same as the above test of unknown propagation stopping where - # auto_unknown=False, but it checks that this applies when `unknown` is set - # by means of `Meta` +def test_propagate_unknown_overrides_explicit_value_for_meta(): + # this is the same as the above test of unknown propagation, but it checks that + # this applies when `unknown` is set by means of `Meta` as well class Bottom(Schema): x = fields.Str() @@ -2949,25 +2947,21 @@ class Middle(Schema): x = fields.Str() child = fields.Nested(Bottom) - # set unknown explicitly here, so auto_unknown will be - # false going into Bottom, and also set propagate to make it propagate - # in this case class Meta: - unknown = EXCLUDE | PROPAGATE + unknown = EXCLUDE class Top(Schema): x = fields.Str() child = fields.Nested(Middle) - # sanity-check that auto-unknown is being set correctly - assert Top().auto_unknown - assert not Top(unknown=INCLUDE | PROPAGATE).auto_unknown - assert not Middle().auto_unknown - data = { "x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye"}}, } result = Top(unknown=INCLUDE | PROPAGATE).load(data) - assert result == {"x": "hi", "y": "bye", "child": {"x": "hi", "child": {"x": "hi"}}} + assert result == { + "x": "hi", + "y": "bye", + "child": {"x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye"}}, + }