From 071e8cce82a366b807afec9bdddf17b6aec75544 Mon Sep 17 00:00:00 2001 From: Ethan Cartwright Date: Fri, 22 Dec 2023 13:09:46 -0500 Subject: [PATCH] fix type annotation errors --- .../src/datahub_classify/infotype_utils.py | 13 +++++++------ .../src/datahub_classify/reference_input.py | 4 +++- datahub-classify/tests/exclude_name_test_config.py | 8 ++++++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/datahub-classify/src/datahub_classify/infotype_utils.py b/datahub-classify/src/datahub_classify/infotype_utils.py index 75e4eb2..cf741c3 100644 --- a/datahub-classify/src/datahub_classify/infotype_utils.py +++ b/datahub-classify/src/datahub_classify/infotype_utils.py @@ -1,6 +1,6 @@ import logging import re -from typing import Any, Dict, List +from typing import Any, Dict, List, Union from datahub_classify.constants import ( EXCLUDE_NAME, @@ -92,7 +92,7 @@ def detect_named_entity_spacy( def perform_basic_checks( metadata: Metadata, values: List[Any], - config_dict: Dict[str, Dict], + config_dict: Dict[str, Union[Dict, List[str], None]], infotype: str, minimum_values_threshold: int, ) -> bool: @@ -102,8 +102,11 @@ def perform_basic_checks( if not config_dict.get("strip_formatting") else strip_formatting(metadata.name) ) + prediction_factors = config_dict.get(PREDICTION_FACTORS_AND_WEIGHTS) + exclude_name = config_dict.get(EXCLUDE_NAME, []) if ( - config_dict[PREDICTION_FACTORS_AND_WEIGHTS].get(VALUES, None) + isinstance(prediction_factors, dict) + and prediction_factors.get(VALUES, None) and len(values) < minimum_values_threshold ): logger.warning( @@ -111,9 +114,7 @@ def perform_basic_checks( f"does not meet minimum threshold for {infotype}" ) basic_checks_status = False - elif config_dict[EXCLUDE_NAME] is not None and metadata.name in config_dict.get( - EXCLUDE_NAME, set() - ): + elif exclude_name is not None and metadata.name in exclude_name: logger.warning(f"Excluding match for {infotype} on column {metadata.name}") basic_checks_status = False # TODO: Add more basic checks diff --git a/datahub-classify/src/datahub_classify/reference_input.py b/datahub-classify/src/datahub_classify/reference_input.py index 972ab63..777fb08 100644 --- a/datahub-classify/src/datahub_classify/reference_input.py +++ b/datahub-classify/src/datahub_classify/reference_input.py @@ -1,6 +1,8 @@ +from typing import Any, Dict, List, Union + # Input Dictionary Format -input1 = { +input1: Dict[str, Dict[str, Union[Dict[str, Any], List[str], None]]] = { "Email_Address": { "Prediction_Factors_and_Weights": { "Name": 0.4, diff --git a/datahub-classify/tests/exclude_name_test_config.py b/datahub-classify/tests/exclude_name_test_config.py index a20b0d4..07b03fc 100644 --- a/datahub-classify/tests/exclude_name_test_config.py +++ b/datahub-classify/tests/exclude_name_test_config.py @@ -1,6 +1,10 @@ +from typing import Any, Dict, List, Union + # Input Dictionary Format -exclude_name_test_config = { +exclude_name_test_config: Dict[ + str, Dict[str, Union[Dict[str, Any], List[str], None]] +] = { "Email_Address": { "Prediction_Factors_and_Weights": { "Name": 1, @@ -29,7 +33,7 @@ }, } -none_exclude_name_test_config = { +none_exclude_name_test_config: Dict[str, Dict[str, Union[Dict[str, Any], List[str], None]]] = { # type: ignore "Email_Address": { "Prediction_Factors_and_Weights": { "Name": 1,