Skip to content

Commit

Permalink
Merge branch 'fix/anonymization_support' of https://github.com/ydataa…
Browse files Browse the repository at this point in the history
…i/ydata-sdk into fix/anonymization_support

# Conflicts:
#	src/ydata/sdk/synthesizers/anonymizer.py
#	src/ydata/sdk/synthesizers/synthesizer.py
  • Loading branch information
fabclmnt committed Sep 6, 2024
2 parents 93654da + 3653bb8 commit 309b2ee
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 11 deletions.
2 changes: 1 addition & 1 deletion examples/synthesizers/anonymize_example.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import os
import os

from ydata.sdk.dataset import get_dataset
from ydata.sdk.synthesizers import RegularSynthesizer

# Do not forget to add your token as env variables
os.environ["YDATA_TOKEN"] = '<TOKEN>' # Remove if already defined


def main():
"""In this example, we demonstrate how to train a synthesizer from a pandas
DataFrame.
Expand Down
14 changes: 8 additions & 6 deletions src/ydata/sdk/synthesizers/anonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@

from ydata.datascience.common import AnonymizerType

def build_and_validate_anonimization(anonimyze:dict, cols: list) -> dict:
isnested = any(isinstance(i,dict) for i in anonimyze.values())

def built_and_validate_anonimization(anonimyze: dict, cols: list) -> dict:
isnested = any(isinstance(i, dict) for i in anonimyze.values())

if not all([True if k in cols else False for k in list(anonimyze.keys())]):
#AnonymizationConfigurationError
raise Exception('The keys in your configuration must exactly match the column names in the provided dataset. Please check and update your inputs to ensure they align.')
# AnonymizationConfigurationError
raise Exception(
'The keys in your configuration must exactly match the column names in the provided dataset. Please check and update your inputs to ensure they align.')

if isnested:
# Validate the format here.
Expand All @@ -32,8 +34,8 @@ def build_and_validate_anonimization(anonimyze:dict, cols: list) -> dict:
config = anonimyze
else:
config = {}
for k,v in anonimyze.items():
print(k,v)
for k, v in anonimyze.items():
print(k, v)
if AnonymizerType.get_anonymizer_type(v) is None:
col_config = {'type': AnonymizerType.REGEX.value, 'regex': v}
else:
Expand Down
8 changes: 4 additions & 4 deletions src/ydata/sdk/synthesizers/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@
from ydata.sdk.synthesizers._models.status import PrepareState, Status, TrainingState
from ydata.sdk.synthesizers._models.synthesizer import Synthesizer as mSynthesizer
from ydata.sdk.synthesizers._models.synthesizers_list import SynthesizersList
from ydata.sdk.synthesizers.anonymizer import built_and_validate_anonimization
from ydata.sdk.utils.model_mixin import ModelFactoryMixin

from ydata.sdk.synthesizers.anonymizer import build_and_validate_anonimization


@typechecked
class BaseSynthesizer(ABC, ModelFactoryMixin):
Expand Down Expand Up @@ -248,8 +247,9 @@ def _fit_from_datasource(
payload['type'] = str(datatype.value)

if anonymize is not None:
#process and validated the anonymization config shared by the end user
anonymize = build_and_validate_anonimization(anonimyze=anonymize, cols=[col.name for col in X.metadata.columns])
# process and validated the anonymization config shared by the end user
anonymize = built_and_validate_anonimization(
anonimyze=anonymize, cols=[col.name for col in X.metadata.columns])
payload["extraData"]["anonymize"] = anonymize
if condition_on is not None:
payload["extraData"]["condition_on"] = condition_on
Expand Down

0 comments on commit 309b2ee

Please sign in to comment.