From 870853f0e81a399ce8adad96689d1868198dd29b Mon Sep 17 00:00:00 2001 From: Maarten-vd-Sande Date: Mon, 17 Aug 2020 13:33:41 +0200 Subject: [PATCH 1/2] ignore nan values --- pandas_schema/validation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas_schema/validation.py b/pandas_schema/validation.py index 5f7c763..8b02506 100644 --- a/pandas_schema/validation.py +++ b/pandas_schema/validation.py @@ -353,15 +353,19 @@ class IsDistinctValidation(_SeriesValidation): Checks that every element of this column is different from each other element """ - def __init__(self, **kwargs): + def __init__(self, ignore_nan=False, **kwargs): super().__init__(**kwargs) + self.ignore_nan = ignore_nan @property def default_message(self): return 'contains values that are not unique' def validate(self, series: pd.Series) -> pd.Series: - return ~series.duplicated(keep='first') + if self.ignore_nan: + return ~series.duplicated(keep='first') | series.isna() + else: + return ~series.duplicated(keep='first') class InListValidation(_SeriesValidation): From 23c321061b6404616b191acace31ce929cad0b18 Mon Sep 17 00:00:00 2001 From: Maarten-vd-Sande Date: Mon, 17 Aug 2020 13:36:32 +0200 Subject: [PATCH 2/2] add docstring --- pandas_schema/validation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas_schema/validation.py b/pandas_schema/validation.py index 8b02506..a98f226 100644 --- a/pandas_schema/validation.py +++ b/pandas_schema/validation.py @@ -354,6 +354,10 @@ class IsDistinctValidation(_SeriesValidation): """ def __init__(self, ignore_nan=False, **kwargs): + """ + :param ignore_nan: Whether or not to ignore nan values when checking for distinct values in a column. + """ + super().__init__(**kwargs) self.ignore_nan = ignore_nan