diff --git a/getschema/impl.py b/getschema/impl.py index 266b2ef..bce279c 100644 --- a/getschema/impl.py +++ b/getschema/impl.py @@ -288,11 +288,16 @@ def infer_from_file(filename, fmt="json", skip=0, lower=False, return schema +class DroppedProperty(object): + pass + + def fix_type( obj, schema, dict_path=[], on_invalid_property="raise", + drop_unknown_properties=False, lower=False, replace_special=False, snake_case=False, @@ -306,9 +311,12 @@ def fix_type( - raise: Raise exception - null: Impute with null - force: Keep it as is (string) + - drop_unknown_properties: True/False + If true, the returned object will exclude unknown (sub-)properties """ kwargs = { "on_invalid_property": on_invalid_property, + "drop_unknown_properties": drop_unknown_properties, "lower": lower, "replace_special": replace_special, "snake_case": snake_case, @@ -347,6 +355,8 @@ def fix_type( cleaned = dict() keys = obj.keys() for key in keys: + if drop_unknown_properties and not _nested_get(schema, dict_path + ["properties", key] + ["type"]): + continue try: ret = fix_type(obj[key], schema, dict_path + ["properties", key], **kwargs) diff --git a/tests/test_fix_type.py b/tests/test_fix_type.py index 11544f4..514c1ca 100644 --- a/tests/test_fix_type.py +++ b/tests/test_fix_type.py @@ -292,3 +292,12 @@ def test_undefined_sub_property(): assert(str(e).startswith("Unknown property found at: ['properties', 'nested_field', 'properties', 'foo']")) else: raise Exception(f"Supposed to fail with null value.") + + + cleaned = getschema.fix_type( + records[1], + schema, + on_invalid_property="raise", + drop_unknown_properties=True, + ) + assert(cleaned["nested_field"].get("foo") == None)