From 136eb6e31be8e2962a0483b18e1ca7efbdb9fc1d Mon Sep 17 00:00:00 2001 From: Michael Wood Date: Thu, 12 Dec 2024 12:40:16 +0000 Subject: [PATCH] wip --- cove/cove_360/fixtures/duration_errors.json | 26 +++ .../components/explore/usefulness.html | 6 +- cove/cove_360/templatetags/cove_tags.py | 9 +- cove/cove_360/views.py | 13 +- lib360dataquality/additional_test.py | 110 ++++++++++++ lib360dataquality/check_field_present.py | 4 +- lib360dataquality/cove/threesixtygiving.py | 159 ++++-------------- 7 files changed, 190 insertions(+), 137 deletions(-) create mode 100644 cove/cove_360/fixtures/duration_errors.json create mode 100644 lib360dataquality/additional_test.py diff --git a/cove/cove_360/fixtures/duration_errors.json b/cove/cove_360/fixtures/duration_errors.json new file mode 100644 index 0000000..87507d1 --- /dev/null +++ b/cove/cove_360/fixtures/duration_errors.json @@ -0,0 +1,26 @@ +{ + "grants": [ + { + "id": "360G-sampletrust-105177/Z/14/Z", + "awardDate": "2024-12-30", + "amountAwarded": 10, + "url": "http://example.com", + "title": "test", + "currency": "GBP", + "description": "test", + "recipientOrganization": [ + { + "id": "GB-323242-test", + "name": "Example Project Limited" + } + ], + "plannedDates": [], + "fundingOrganization": [ + { + "id": "GB-323242-test", + "name": "Example Project Limited" + } + ] + } + ] +} diff --git a/cove/cove_360/templates/cove_360/components/explore/usefulness.html b/cove/cove_360/templates/cove_360/components/explore/usefulness.html index eec2d8d..b586102 100644 --- a/cove/cove_360/templates/cove_360/components/explore/usefulness.html +++ b/cove/cove_360/templates/cove_360/components/explore/usefulness.html @@ -34,7 +34,7 @@

{{category}}

- + @@ -45,7 +45,9 @@

{{category}}

{% trans 'Passed' %}{% trans 'Failed' %} {% trans 'Check Description' %} {% trans 'First 3 Locations' %}
-
+
+ {{message.percentage|multiply:100}}% +
diff --git a/cove/cove_360/templatetags/cove_tags.py b/cove/cove_360/templatetags/cove_tags.py index 8ac0908..68d8c78 100644 --- a/cove/cove_360/templatetags/cove_tags.py +++ b/cove/cove_360/templatetags/cove_tags.py @@ -40,4 +40,11 @@ def cove_360_modal_list(**context): @register.filter("multiply") def multiply(a, b): - return a*b + """ Multiple a,b if result is less than one output 1 decimal place otherwise as a rounded int""" + res = a*b + + if res < 1: + return f"{(a*b):.1f}" + + return int(round(res)) + diff --git a/cove/cove_360/views.py b/cove/cove_360/views.py index 1d86808..5929bd5 100644 --- a/cove/cove_360/views.py +++ b/cove/cove_360/views.py @@ -207,8 +207,17 @@ def explore_360(request, pk, template='cove_360/explore.html'): import pprint pprint.pprint(context, stream=open("/tmp/dqt.py", "w"), indent=2) - context["usefulness_categories"] = set([message["category"] for message, a, b in context["usefulness_checks"]]) - context["quality_accuracy_categories"] = set([message["category"] for message, a, b in context["quality_accuracy_checks"]]) + try: + context["usefulness_categories"] = set([message["category"] for message, a, b in context["usefulness_checks"]]) + except TypeError: + # if no usefulness_checks the iteration will fail + context["usefulness_categories"] = [] + + try: + context["quality_accuracy_categories"] = set([message["category"] for message, a, b in context["quality_accuracy_checks"]]) + except TypeError: + # if no quality quality_accuracy categories the iteration will fail + context["quality_accuracy_categories"] = [] try: context["quality_accuracy_checks_passed"] = create_passed_tests_context_data(context["quality_accuracy_checks"], TEST_CLASSES["quality_accuracy"]) diff --git a/lib360dataquality/additional_test.py b/lib360dataquality/additional_test.py new file mode 100644 index 0000000..b37adb1 --- /dev/null +++ b/lib360dataquality/additional_test.py @@ -0,0 +1,110 @@ + +class TestType(object): + QUALITY_TEST_CLASS = "quality_accuracy" + USEFULNESS_TEST_CLASS = "usefulness" + + +class TestRelevance(object): + RECIPIENT_ANY = "" + RECIPIENT_ORGANISATION = "recipient organisation" + RECIPIENT_INDIVIDUAL = "recipient individual" + + +class TestCategories(object): + GRANTS = "Grants" + ORGANISATIONS = "Organisations" + DATA_PROTECTION = "Data Protection" + DATES = "Dates" + LOCATION = "Location" + METADATA = "Metadata" + + +class AdditionalTest(object): + category = TestCategories.GRANTS + + def __init__(self, **kw): + self.grants = kw["grants"] + self.aggregates = kw["aggregates"] + self.grants_percentage = 0 + self.json_locations = [] + self.failed = False + self.count = 0 + self.heading = None + self.message = None + # Default to the most common type + self.relevant_grant_type = TestRelevance.RECIPIENT_ANY + + def process(self, grant, path_prefix): + pass + + def produce_message(self): + return { + "heading": self.heading, + "message": self.message, + "type": self.__class__.__name__, + "count": self.count, + "percentage": self.grants_percentage, + "category": self.__class__.category, + } + + def get_heading_count(self, test_class_type): + # The total grants is contextual e.g. a test may fail for a recipient org-id + # this is only relevant to grants to organisations and not individuals + if self.relevant_grant_type == TestRelevance.RECIPIENT_ANY: + total = self.aggregates["count"] + elif self.relevant_grant_type == TestRelevance.RECIPIENT_ORGANISATION: + total = self.aggregates["count"] - self.aggregates["recipient_individuals_count"] + elif self.relevant_grant_type == TestRelevance.RECIPIENT_INDIVIDUAL: + # if there are no individuals in this data then reset the count + if self.aggregates["recipient_individuals_count"] == 0: + self.count = 0 + total = self.aggregates["recipient_individuals_count"] + + # Guard against a division by 0 + if total < 1: + total = 1 + + self.grants_percentage = self.count / total + + # Return conditions + + if test_class_type == TestType.QUALITY_TEST_CLASS: + return self.count + + if self.aggregates["count"] == 1 and self.count == 1: + self.grants_percentage = 1.0 + return f"1 {self.relevant_grant_type}".strip() + + if self.count <= 5: + return f"{self.count} {self.relevant_grant_type}".strip() + + return f"{round(self.grants_percentage*100)}% of {self.relevant_grant_type}".strip() + + def format_heading_count(self, message, test_class_type=None, verb="have"): + """Build a string with count of grants plus message + + The grant count phrase for the test is pluralized and + prepended to message, eg: 1 grant has + message, + 2 grants have + message or 3 grants contain + message. + """ + noun = "grant" if self.count == 1 else "grants" + + # Positive result - "what is working well" + # Avoid double negative + if not message.startswith("not have") and message.startswith("not") and self.count == 0: + message = message[len("not"):] + if message.startswith("not have") and self.count == 0: + verb = "do" + # End positive result flip + + if verb == "have": + verb = "has" if self.count == 1 else verb + elif verb == "do": + verb = "does" if self.count == 1 else verb + else: + # Naively! + verb = verb + "s" if self.count == 1 else verb + + return "{} {} {} {}".format( + self.get_heading_count(test_class_type), noun, verb, message + ) diff --git a/lib360dataquality/check_field_present.py b/lib360dataquality/check_field_present.py index 27087fa..794bbe7 100644 --- a/lib360dataquality/check_field_present.py +++ b/lib360dataquality/check_field_present.py @@ -1,4 +1,4 @@ -from lib360dataquality.cove.threesixtygiving import AdditionalTest, RECIPIENT_INDIVIDUAL +from lib360dataquality.additional_test import AdditionalTest, TestRelevance from functools import wraps @@ -109,7 +109,7 @@ class IndividualsCodeListsNotPresent(FieldNotPresentBase): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_INDIVIDUAL + self.relevant_grant_type = TestRelevance.RECIPIENT_INDIVIDUAL def check_field(self, grant): # Not relevant diff --git a/lib360dataquality/cove/threesixtygiving.py b/lib360dataquality/cove/threesixtygiving.py index e4900d3..6d339e1 100644 --- a/lib360dataquality/cove/threesixtygiving.py +++ b/lib360dataquality/cove/threesixtygiving.py @@ -14,6 +14,8 @@ from libcove.lib.common import common_checks_context, get_additional_codelist_values, get_orgids_prefixes, validator from libcove.lib.tools import decimal_default from rangedict import RangeDict as range_dict +from lib360dataquality.additional_test import AdditionalTest, TestType, TestCategories, TestRelevance +from lib360dataquality.check_field_present import PlannedDurationNotPresent try: from django.utils.html import mark_safe @@ -23,8 +25,6 @@ def mark_safe(string): return string -QUALITY_TEST_CLASS = "quality_accuracy" -USEFULNESS_TEST_CLASS = "usefulness" DATES_JSON_LOCATION = { "award_date": "/awardDate", @@ -392,7 +392,7 @@ def common_checks_360( # If no particular test classes are supplied then run all defined here if not test_classes: - test_classes = [QUALITY_TEST_CLASS, USEFULNESS_TEST_CLASS] + test_classes = [TestType.QUALITY_TEST_CLASS, TestType.USEFULNESS_TEST_CLASS] if context["file_type"] == "xlsx": try: @@ -515,110 +515,6 @@ def flatten_dict(grant, path=""): yield ("{}/{}".format(path, key), value) -RECIPIENT_ANY = "" -RECIPIENT_ORGANISATION = "recipient organisation" -RECIPIENT_INDIVIDUAL = "recipient individual" - - -class TestCategories(object): - GRANTS = "Grants" - ORGANISATIONS = "Organisations" - DATA_PROTECTION = "Data Protection" - DATES = "Dates" - LOCATION = "Location" - METADATA = "Metadata" - - -class AdditionalTest: - category = TestCategories.GRANTS - - def __init__(self, **kw): - self.grants = kw["grants"] - self.aggregates = kw["aggregates"] - self.grants_percentage = 0 - self.json_locations = [] - self.failed = False - self.count = 0 - self.heading = None - self.message = None - # Default to the most common type - self.relevant_grant_type = RECIPIENT_ANY - - def process(self, grant, path_prefix): - pass - - def produce_message(self): - return { - "heading": self.heading, - "message": self.message, - "type": self.__class__.__name__, - "count": self.count, - "percentage": self.grants_percentage, - "category": self.__class__.category, - } - - def get_heading_count(self, test_class_type): - # The total grants is contextual e.g. a test may fail for a recipient org-id - # this is only relevant to grants to organisations and not individuals - if self.relevant_grant_type == RECIPIENT_ANY: - total = self.aggregates["count"] - elif self.relevant_grant_type == RECIPIENT_ORGANISATION: - total = self.aggregates["count"] - self.aggregates["recipient_individuals_count"] - elif self.relevant_grant_type == RECIPIENT_INDIVIDUAL: - # if there are no individuals in this data then reset the count - if self.aggregates["recipient_individuals_count"] == 0: - self.count = 0 - total = self.aggregates["recipient_individuals_count"] - - # Guard against a division by 0 - if total < 1: - total = 1 - - self.grants_percentage = self.count / total - - # Return conditions - - if test_class_type == QUALITY_TEST_CLASS: - return self.count - - if self.aggregates["count"] == 1 and self.count == 1: - self.grants_percentage = 1.0 - return f"1 {self.relevant_grant_type}".strip() - - if self.count <= 5: - return f"{self.count} {self.relevant_grant_type}".strip() - - return f"{round(self.grants_percentage*100)}% of {self.relevant_grant_type}".strip() - - def format_heading_count(self, message, test_class_type=None, verb="have"): - """Build a string with count of grants plus message - - The grant count phrase for the test is pluralized and - prepended to message, eg: 1 grant has + message, - 2 grants have + message or 3 grants contain + message. - """ - noun = "grant" if self.count == 1 else "grants" - - # Positive result - "what is working well" - # Avoid double negative - if not message.startswith("not have") and message.startswith("not") and self.count == 0: - message = message[len("not"):] - if message.startswith("not have") and self.count == 0: - verb = "do" - # End positive result flip - - if verb == "have": - verb = "has" if self.count == 1 else verb - elif verb == "do": - verb = "does" if self.count == 1 else verb - else: - # Naively! - verb = verb + "s" if self.count == 1 else verb - - return "{} {} {} {}".format( - self.get_heading_count(test_class_type), noun, verb, message - ) - class ZeroAmountTest(AdditionalTest): """Check if any grants have an amountAwarded of 0. @@ -652,7 +548,7 @@ def process(self, grant, path_prefix): self.heading = mark_safe( self.format_heading_count( - self.check_text["heading"], test_class_type=QUALITY_TEST_CLASS + self.check_text["heading"], test_class_type=TestType.QUALITY_TEST_CLASS ) ) self.message = self.check_text["message"][self.grants_percentage] @@ -684,7 +580,7 @@ class RecipientOrg360GPrefix(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_ORGANISATION + self.relevant_grant_type = TestRelevance.RECIPIENT_ORGANISATION def process(self, grant, path_prefix): try: @@ -760,7 +656,7 @@ class RecipientOrgUnrecognisedPrefix(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_ORGANISATION + self.relevant_grant_type = TestRelevance.RECIPIENT_ORGANISATION def process(self, grant, path_prefix): try: @@ -783,7 +679,7 @@ def process(self, grant, path_prefix): self.heading = mark_safe( self.format_heading_count( - self.check_text["heading"], test_class_type=QUALITY_TEST_CLASS + self.check_text["heading"], test_class_type=TestType.QUALITY_TEST_CLASS ) ) self.message = self.check_text["message"][self.grants_percentage] @@ -830,7 +726,7 @@ def process(self, grant, path_prefix): self.heading = mark_safe( self.format_heading_count( - self.check_text["heading"], test_class_type=QUALITY_TEST_CLASS + self.check_text["heading"], test_class_type=TestType.QUALITY_TEST_CLASS ) ) self.message = self.check_text["message"][self.grants_percentage] @@ -863,7 +759,7 @@ class RecipientOrgCharityNumber(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_ORGANISATION + self.relevant_grant_type = TestRelevance.RECIPIENT_ORGANISATION def process(self, grant, path_prefix): try: @@ -888,7 +784,7 @@ def process(self, grant, path_prefix): self.heading = mark_safe( self.format_heading_count( - self.check_text["heading"], test_class_type=QUALITY_TEST_CLASS + self.check_text["heading"], test_class_type=TestType.QUALITY_TEST_CLASS ) ) self.message = self.check_text["message"][self.grants_percentage] @@ -922,7 +818,7 @@ class RecipientOrgCompanyNumber(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_ORGANISATION + self.relevant_grant_type = TestRelevance.RECIPIENT_ORGANISATION def process(self, grant, path_prefix): try: @@ -944,7 +840,7 @@ def process(self, grant, path_prefix): self.heading = mark_safe( self.format_heading_count( - self.check_text["heading"], test_class_type=QUALITY_TEST_CLASS + self.check_text["heading"], test_class_type=TestType.QUALITY_TEST_CLASS ) ) self.message = mark_safe(self.check_text["message"][self.grants_percentage]) @@ -973,7 +869,7 @@ class NoRecipientOrgCompanyCharityNumber(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_ORGANISATION + self.relevant_grant_type = TestRelevance.RECIPIENT_ORGANISATION def process(self, grant, path_prefix): try: @@ -1026,7 +922,7 @@ class IncompleteRecipientOrg(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_ORGANISATION + self.relevant_grant_type = TestRelevance.RECIPIENT_ORGANISATION def process(self, grant, path_prefix): try: @@ -1135,7 +1031,7 @@ def process(self, grant, path_prefix): self.heading = self.format_heading_count( self.check_text["heading"], - test_class_type=QUALITY_TEST_CLASS, + test_class_type=TestType.QUALITY_TEST_CLASS, verb="contain", ) self.message = self.check_text["message"][self.grants_percentage] @@ -1278,7 +1174,7 @@ class GrantIdUnexpectedChars(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_ANY + self.relevant_grant_type = TestRelevance.RECIPIENT_ANY def process(self, grant, path_prefix): if "\n" in grant.get("id"): @@ -1287,7 +1183,7 @@ def process(self, grant, path_prefix): self.count += 1 self.heading = self.format_heading_count( - self.check_text["heading"], test_class_type=QUALITY_TEST_CLASS + self.check_text["heading"], test_class_type=TestType.QUALITY_TEST_CLASS ) self.message = self.check_text["message"][self.grants_percentage] @@ -1310,7 +1206,7 @@ class OrganizationIdUnexpectedChars(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_ORGANISATION + self.relevant_grant_type = TestRelevance.RECIPIENT_ORGANISATION def process(self, grant, path_prefix): for org_type in ("fundingOrganization", "recipientOrganization"): @@ -1327,7 +1223,7 @@ def process(self, grant, path_prefix): self.count += 1 self.heading = self.format_heading_count( - self.check_text["heading"], test_class_type=QUALITY_TEST_CLASS + self.check_text["heading"], test_class_type=TestType.QUALITY_TEST_CLASS ) self.message = self.check_text["message"][self.grants_percentage] @@ -1356,7 +1252,7 @@ class OrganizationIdLooksInvalid(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_ORGANISATION + self.relevant_grant_type = TestRelevance.RECIPIENT_ORGANISATION def process(self, grant, path_prefix): for org_type in ("fundingOrganization", "recipientOrganization"): @@ -1379,7 +1275,7 @@ def process(self, grant, path_prefix): self.count += 1 self.heading = self.format_heading_count( - self.check_text["heading"], test_class_type=QUALITY_TEST_CLASS + self.check_text["heading"], test_class_type=TestType.QUALITY_TEST_CLASS ) self.message = self.check_text["message"][self.grants_percentage] @@ -1775,7 +1671,7 @@ class RecipientIndWithoutToIndividualsDetails(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_INDIVIDUAL + self.relevant_grant_type = TestRelevance.RECIPIENT_INDIVIDUAL def process(self, grant, path_prefix): if "recipientIndividual" in grant and "toIndividualsDetails" not in grant: @@ -1807,7 +1703,7 @@ class RecipientIndDEI(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_INDIVIDUAL + self.relevant_grant_type = TestRelevance.RECIPIENT_INDIVIDUAL def process(self, grant, path_prefix): if "recipientIndividual" in grant and "project" in grant: @@ -1851,7 +1747,7 @@ class GeoCodePostcode(AdditionalTest): def __init__(self, **kwargs): super().__init__(**kwargs) - self.relevant_grant_type = RECIPIENT_INDIVIDUAL + self.relevant_grant_type = TestRelevance.RECIPIENT_INDIVIDUAL def process(self, grant, path_prefix): if "recipientIndividual" in grant: @@ -1868,8 +1764,10 @@ def process(self, grant, path_prefix): self.message = self.check_text["message"][self.grants_percentage] +# Default tests run in CoVE, these are also the base list +# for the Quality Dashboard checks. TEST_CLASSES = { - QUALITY_TEST_CLASS: [ + TestType.QUALITY_TEST_CLASS: [ ZeroAmountTest, FundingOrgUnrecognisedPrefix, RecipientOrgUnrecognisedPrefix, @@ -1890,7 +1788,7 @@ def process(self, grant, path_prefix): RecipientIndDEI, GeoCodePostcode, ], - USEFULNESS_TEST_CLASS: [ + TestType.USEFULNESS_TEST_CLASS: [ RecipientOrg360GPrefix, FundingOrg360GPrefix, NoRecipientOrgCompanyCharityNumber, @@ -1902,6 +1800,7 @@ def process(self, grant, path_prefix): NoLastModified, NoDataSource, RecipientIndWithoutToIndividualsDetails, + PlannedDurationNotPresent, ], }