From 19cec1285f3fe825c71b5e8f79eb73fd93df541b Mon Sep 17 00:00:00 2001 From: Frank Date: Sat, 22 Feb 2020 19:52:52 -0500 Subject: [PATCH 1/4] Adds contingent validation with support for optional columns --- pandas_schema/column.py | 5 ++++- pandas_schema/validation.py | 4 +++- test/test_validation.py | 12 ++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pandas_schema/column.py b/pandas_schema/column.py index 199b883..5416cc0 100644 --- a/pandas_schema/column.py +++ b/pandas_schema/column.py @@ -5,7 +5,9 @@ from .validation_warning import ValidationWarning class Column: - def __init__(self, name: str, validations: typing.Iterable['validation._BaseValidation'] = [], allow_empty=False): + def __init__(self, name: str, validations: typing.Iterable['validation._BaseValidation'] = [], + allow_empty=False, + optional=False): """ Creates a new Column object @@ -16,6 +18,7 @@ def __init__(self, name: str, validations: typing.Iterable['validation._BaseVali self.name = name self.validations = list(validations) self.allow_empty = allow_empty + self.optional = optional def validate(self, series: pd.Series) -> typing.List[ValidationWarning]: """ diff --git a/pandas_schema/validation.py b/pandas_schema/validation.py index 5f7c763..de034a0 100644 --- a/pandas_schema/validation.py +++ b/pandas_schema/validation.py @@ -90,7 +90,9 @@ def get_errors(self, series: pd.Series, column: 'column.Column'): validated = ~series.isnull() & simple_validation else: validated = (series.str.len() > 0) & simple_validation - + elif column.optional: + if bool(series.isnull().all()) or list(series.unique()) == ['']: + validated = [] else: validated = simple_validation diff --git a/test/test_validation.py b/test/test_validation.py index fc40100..b4f2851 100644 --- a/test/test_validation.py +++ b/test/test_validation.py @@ -660,6 +660,18 @@ def test_in_range_allow_empty_false_with_error(self): errors = validator.get_errors(pd.Series(self.vals), Column('', allow_empty=False)) self.assertEqual(len(errors), len(self.vals)) + def test_in_range_optional_missing(self): + validator = InRangeValidation(min=0) + errors = validator.get_errors(pd.Series(), Column('', optional=True)) + + self.assertEqual(len(errors), 0) + + def test_in_range_optional_with_error(self): + validator = InRangeValidation(min=4) + errors = validator.get_errors(pd.Series(self.vals), Column('', optional=False)) + + self.assertEqual(len(errors), len(self.vals)) + class PandasDtypeTests(ValidationTestBase): """ From 06ae5a121c9a79dcb884b81dcf8fb0da45864419 Mon Sep 17 00:00:00 2001 From: Frank Date: Sat, 22 Feb 2020 20:00:34 -0500 Subject: [PATCH 2/4] Allow optional columns to be missing from data --- pandas_schema/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas_schema/schema.py b/pandas_schema/schema.py index 5c0442e..f8a4c26 100644 --- a/pandas_schema/schema.py +++ b/pandas_schema/schema.py @@ -73,7 +73,7 @@ def validate(self, df: pd.DataFrame, columns: typing.List[str] = None) -> typing for column in columns_to_pair: # Throw an error if the schema column isn't in the data frame - if column.name not in df: + if column.name not in df and not column.optional: errors.append(ValidationWarning( 'The column {} exists in the schema but not in the data frame'.format(column.name))) return errors From ca38653e5820df30aac1ad710ecce4d1fbd8ef47 Mon Sep 17 00:00:00 2001 From: Frank Date: Sun, 23 Feb 2020 14:03:36 -0500 Subject: [PATCH 3/4] Fix --- pandas_schema/validation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas_schema/validation.py b/pandas_schema/validation.py index de034a0..db4e7d8 100644 --- a/pandas_schema/validation.py +++ b/pandas_schema/validation.py @@ -90,9 +90,8 @@ def get_errors(self, series: pd.Series, column: 'column.Column'): validated = ~series.isnull() & simple_validation else: validated = (series.str.len() > 0) & simple_validation - elif column.optional: - if bool(series.isnull().all()) or list(series.unique()) == ['']: - validated = [] + elif column.optional and bool(series.isnull().all()) or list(series.unique()) == ['']: + validated = [] else: validated = simple_validation From df32cb2910907d7f7077e306f453d2beecd5015b Mon Sep 17 00:00:00 2001 From: Frank Date: Sun, 23 Feb 2020 14:21:59 -0500 Subject: [PATCH 4/4] fix expression again --- pandas_schema/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas_schema/validation.py b/pandas_schema/validation.py index db4e7d8..6b8ba14 100644 --- a/pandas_schema/validation.py +++ b/pandas_schema/validation.py @@ -90,7 +90,7 @@ def get_errors(self, series: pd.Series, column: 'column.Column'): validated = ~series.isnull() & simple_validation else: validated = (series.str.len() > 0) & simple_validation - elif column.optional and bool(series.isnull().all()) or list(series.unique()) == ['']: + elif column.optional and (bool(series.isnull().all()) or list(series.unique()) == ['']): validated = [] else: validated = simple_validation