Skip to content

Commit

Permalink
Adding complex examples + using ext ngjson
Browse files Browse the repository at this point in the history
  • Loading branch information
leandrodamascena committed Dec 20, 2023
1 parent e0f4a3a commit fbed1a1
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 31 deletions.
32 changes: 22 additions & 10 deletions aws_lambda_powertools/utilities/_data_masking/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

import functools
import logging
import warnings
from numbers import Number
from typing import Any, Callable, Mapping, Optional, Sequence, Union, overload

from jsonpath_ng import parse
from jsonpath_ng.ext import parse

from aws_lambda_powertools.utilities._data_masking.exceptions import (
DataMaskingFieldNotFoundError,
Expand Down Expand Up @@ -47,7 +48,7 @@ def lambda_handler(event, context):
def __init__(
self,
provider: Optional[BaseProvider] = None,
raise_on_missing_field: bool = False,
raise_on_missing_field: bool = True,
):
self.provider = provider or BaseProvider()
# NOTE: we depend on Provider to not confuse customers in passing the same 2 serializers in 2 places
Expand Down Expand Up @@ -236,19 +237,30 @@ def _apply_action_to_fields(
for field_parse in fields:
# Parse the field expression using a 'parse' function.
json_parse = parse(field_parse)
# Find the corresponding keys in the normalized data using the parsed expression.
result_parse = json_parse.find(data_parsed)

if self.raise_on_missing_field:
# Customer wants to raise exception when field is not found
# Find the corresponding data in the normalized data using the parsed expression.
result_parse = json_parse.find(data_parsed)

# If the data for the field is not found, raise an exception.
if not result_parse:
if not result_parse:
if self.raise_on_missing_field:
# If the data for the field is not found, raise an exception.
raise DataMaskingFieldNotFoundError(f"Field or expression {field_parse} not found in {data_parsed}")
else:
# If the data for the field is not found, warning.
warnings.warn(f"Field or expression {field_parse} not found in {data_parsed}", stacklevel=2)

# For in-place updates, json_parse accepts a callback function
# that receives 3 args: field_value, fields, field_name
# We create a partial callback to pre-populate known provider options (action, provider opts, enc ctx)
update_callback = functools.partial(
self._call_action,
action=action,
provider_options=provider_options,
**encryption_context,
)

json_parse.update(
data_parsed,
lambda field_value, fields, field_name: update_callback(field_value, fields, field_name),
lambda field_value, fields, field_name: update_callback(field_value, fields, field_name), # noqa: B023
)

return data_parsed
Expand Down
37 changes: 18 additions & 19 deletions docs/utilities/data_masking.md
Original file line number Diff line number Diff line change
Expand Up @@ -309,24 +309,6 @@ Here are common scenarios to best visualize how to use `fields`.
--8<-- "examples/data_masking/src/choosing_payload_complex_nested_keys_output.json"
```

=== "Accessing list index"

You want to obfuscate data under `street` field located at the initial index of the address list.

=== "Data"

> Expression: `data_masker.mask(data, fields=["address[1].street"])`

```json hl_lines="12"
--8<-- "examples/data_masking/src/choosing_payload_list_index.json"
```

=== "Result"

```json hl_lines="12"
--8<-- "examples/data_masking/src/choosing_payload_list_index_output.json"
```

=== "All fields in a list"

You want to obfuscate data under `street` field located at the any index of the address list.
Expand All @@ -353,7 +335,7 @@ Here are common scenarios to best visualize how to use `fields`.

> Expression: `data_masker.mask(data, fields=["address[-1].street"])`

```json hl_lines="8 12 16"
```json hl_lines="16"
--8<-- "examples/data_masking/src/choosing_payload_list_slice.json"
```

Expand All @@ -363,6 +345,23 @@ Here are common scenarios to best visualize how to use `fields`.
--8<-- "examples/data_masking/src/choosing_payload_list_slice_output.json"
```

=== "Complex expressions"

You want to obfuscate data by finding for a field with conditional expression.

=== "Data"

> Expression: `data_masker.mask(data, fields=["$.address[?(@.postcode > 81846)]"])`

```json hl_lines="8 12"
--8<-- "examples/data_masking/src/choosing_payload_complex_search.json"
```

=== "Result"

```json hl_lines="8 12"
--8<-- "examples/data_masking/src/choosing_payload_complex_search_output.json"
```
For comprehensive guidance on using JSONPath syntax, please refer to the official documentation available at [jsonpath-ng](https://github.com/h2non/jsonpath-ng#jsonpath-syntax){target="_blank" rel="nofollow"}

#### JSON
Expand Down
19 changes: 19 additions & 0 deletions examples/data_masking/src/choosing_payload_complex_search.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"name": "Lessa",
"operation": "non sensitive",
"card_number": "1000 4444 333 2222",
"address": [
{
"postcode": 81847,
"street": "38986 Joanne Stravenue"
},
{
"postcode": 91034,
"street": "14987 Avenue 1"
},
{
"postcode": 78495,
"street": "34452 Avenue 10"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"name": "Lessa",
"operation": "non sensitive",
"card_number": "1000 4444 333 2222",
"address": [
{
"postcode": 81847,
"street": "*****"
},
{
"postcode": 91034,
"street": "*****"
},
{
"postcode": 78495,
"street": "34452 Avenue 10"
}
]
}
78 changes: 78 additions & 0 deletions tests/functional/data_masking/test_aws_encryption_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,3 +381,81 @@ def test_encrypt_with_complex_dict(data_masker):

# THEN the result is only the specified fields are masked
assert decrypted_data == json.loads(data)


def test_encrypt_with_slice(data_masker):
# GIVEN the data type is a json representation of a dictionary with a list inside
data = json.dumps(
{
"name": "Leandro",
"operation": "non sensitive",
"card_number": "1000 4444 333 2222",
"address": [
{
"postcode": 81847,
"street": "38986 Joanne Stravenue",
"country": "United States",
"timezone": "America/La_Paz",
},
{
"postcode": 94400,
"street": "623 Kraig Mall",
"country": "United States",
"timezone": "America/Mazatlan",
},
{
"postcode": 94480,
"street": "123 Kraig Mall",
"country": "United States",
"timezone": "America/Mazatlan",
},
],
},
)

fields_operation = ["address[-1]"]
# WHEN encrypting and then decrypting the encrypted data
encrypted_data = data_masker.encrypt(data, fields=fields_operation)
decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation)

# THEN the result is only the specified fields are masked
assert decrypted_data == json.loads(data)


def test_encrypt_with_complex_search(data_masker):
# GIVEN the data type is a json representation of a dictionary with a list inside
data = json.dumps(
{
"name": "Leandro",
"operation": "non sensitive",
"card_number": "1000 4444 333 2222",
"address": [
{
"postcode": 81847,
"street": "38986 Joanne Stravenue",
"country": "United States",
"timezone": "America/La_Paz",
},
{
"postcode": 94400,
"street": "623 Kraig Mall",
"country": "United States",
"timezone": "America/Mazatlan",
},
{
"postcode": 94480,
"street": "123 Kraig Mall",
"country": "United States",
"timezone": "America/Mazatlan",
},
],
},
)

fields_operation = ["$.address[?(@.postcode > 81847)]"]
# WHEN encrypting and then decrypting the encrypted data
encrypted_data = data_masker.encrypt(data, fields=fields_operation)
decrypted_data = data_masker.decrypt(encrypted_data, fields=["address[1:3]"])

# THEN the result is only the specified fields are masked
assert decrypted_data == json.loads(data)
5 changes: 3 additions & 2 deletions tests/unit/data_masking/test_unit_data_masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def test_parsing_nonexistent_fields_with_raise_on_missing_field():
data_masker.mask(data, ["'3'..True"])


def test_parsing_nonexistent_fields_without_raise_on_missing_field():
def test_parsing_nonexistent_fields_warning_on_missing_field():
# GIVEN a dict data type

data_masker = DataMasking(raise_on_missing_field=False)
Expand All @@ -191,7 +191,8 @@ def test_parsing_nonexistent_fields_without_raise_on_missing_field():
}

# WHEN mask is called with a non-existing field
masked_json_string = data_masker.mask(data, fields=["non-existing"])
with pytest.warns(UserWarning, match="Field or expression*"):
masked_json_string = data_masker.mask(data, fields=["non-existing"])

# THEN the "masked" payload is the same of the original
assert masked_json_string == data

0 comments on commit fbed1a1

Please sign in to comment.