Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Other management for unicity error? #27

Merged
merged 2 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions checkcel/checkcel.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _log_debug_failures(self):

def _log_validator_failures(self):
for field_name, validator in self.validators.items():
if validator.bad:
if validator.bad['invalid_set'] or validator.bad['invalid_unique']:
self.error(
" {} failed {} time(s) ({:.1%}) on field: '{}'".format(
validator.__class__.__name__,
Expand All @@ -59,17 +59,34 @@ def _log_validator_failures(self):
field_name,
)
)
try:
# If self.bad is iterable, it contains the fields which
# caused it to fail
data = validator.bad
wrong_terms = ", ".join(["'{}'".format(val) for val in data["invalid_set"]])
wrong_rows = ", ".join([str(val) for val in data["invalid_rows"]])
if validator.bad['invalid_set']:
try:
# If self.bad is iterable, it contains the fields which
# caused it to fail
data = validator.bad
wrong_terms = ", ".join(["'{}'".format(val) for val in data["invalid_set"]])
wrong_rows = ", ".join([str(val) for val in data["invalid_rows"]])
self.error(
" Invalid fields: [{}] in rows: [{}]".format(wrong_terms, wrong_rows)
)
except TypeError as e:
raise e

if validator.bad['invalid_unique']:
self.error(
" Invalid fields: [{}] in rows: [{}]".format(wrong_terms, wrong_rows)
" The following values failed unicity check: ".format(
)
)
except TypeError as e:
raise e
try:
# If self.bad is iterable, it contains the fields which
# caused it to fail
for key, values in validator.bad['invalid_unique'].items():
wrong_rows = ", ".join([str(val) for val in values])
self.error(
" Value: '{}' in rows: [{}]".format(key, wrong_rows)
)
except TypeError as e:
raise e

def _log_missing_validators(self):
self.error(" Missing validators for:")
Expand Down
34 changes: 17 additions & 17 deletions checkcel/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ class Validator(object):

def __init__(self, empty_ok=None, ignore_case=None, ignore_space=None, empty_ok_if=None, empty_ok_unless=None, readme=None, unique=None, na_ok=None, skip_generation=None, skip_validation=None):
self.logger = logs.logger
self.invalid_dict = defaultdict(set)
self.invalid_dict = {
"invalid_set": set(),
"invalid_rows": set(),
"invalid_unique": defaultdict(set)
}

self.fail_count = 0
self.empty_ok = empty_ok
self.na_ok = na_ok
Expand Down Expand Up @@ -188,8 +193,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -254,8 +258,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -371,8 +374,7 @@ def validate(self, field, row_number, row):
)
if field and self.unique:
if str(field) in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(str(field))

Expand Down Expand Up @@ -473,8 +475,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -586,8 +587,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -696,8 +696,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -776,8 +775,7 @@ def validate(self, field, row_number, row):
raise ValidationException(e)
if self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -998,8 +996,7 @@ def validate(self, field, row_number, row):
if key not in self.unique_values:
self.unique_values.add(key)
else:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
if self.unique_with:
raise ValidationException(
"'{}' is already in the column (unique with: {})".format(
Expand Down Expand Up @@ -1102,6 +1099,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -1246,6 +1244,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -1341,6 +1340,7 @@ def validate(self, field, row_number, row):
raise ValidationException("{} is not a valid GPS coordinate")
if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down
Loading