diff --git a/ftc/documents.py b/ftc/documents.py index 437fb4b..2984592 100644 --- a/ftc/documents.py +++ b/ftc/documents.py @@ -104,6 +104,10 @@ class OrganisationGroup(Document): source = fields.KeywordField() locations = fields.KeywordField() search_scale = fields.FloatField() + hq_ward = fields.KeywordField() + hq_localauthority = fields.KeywordField() + hq_region = fields.KeywordField() + hq_country = fields.KeywordField() @classmethod def search(cls, using=None, index=None): @@ -181,9 +185,11 @@ def prepare_sortname(self, instance): def prepare_domain(self, instance): return list( - filter( - lambda item: item is not None, - [get_domain(link) for link in instance.get_all("url")], + set( + filter( + lambda item: item is not None, + [get_domain(link) for link in instance.get_all("url")], + ) ) ) @@ -196,6 +202,18 @@ def prepare_organisationTypePrimary(self, instance): def prepare_locations(self, instance): return instance.geocodes + def prepare_hq_ward(self, instance): + return instance.hq_region("ward") + + def prepare_hq_localauthority(self, instance): + return instance.hq_region("laua") + + def prepare_hq_region(self, instance): + return instance.hq_region("rgn") + + def prepare_hq_country(self, instance): + return instance.hq_region("ctry") + def prepare_search_scale(self, instance): return instance.search_scale @@ -261,6 +279,10 @@ class CompanyDocument(Document): RegAddress_PostCode = fields.KeywordField(attr="RegAddress_PostCode") CompanyCategory = fields.KeywordField(attr="CompanyCategory") PreviousNames = fields.TextField() + RegAddress_Ward = fields.KeywordField(attr="RegAddress_Ward") + RegAddress_LocalAuthority = fields.KeywordField(attr="RegAddress_LocalAuthority") + RegAddress_Region = fields.KeywordField(attr="RegAddress_Region") + RegAddress_Country = fields.KeywordField(attr="RegAddress_Country") @classmethod def search(cls, using=None, index=None): @@ -282,22 +304,28 @@ def get_queryset(self): """ return self.django.model.objects.raw( """ - with names as ( - select "CompanyNumber", - json_agg("CompanyName") as "names" - from "companies_previousname" - group by "CompanyNumber" + WITH names AS ( + SELECT "CompanyNumber", + json_agg("CompanyName") AS "names" + FROM "companies_previousname" + GROUP BY "CompanyNumber" ) SELECT c."CompanyNumber", c."CompanyName", c."CompanyStatus", c."RegAddress_PostCode", c."CompanyCategory", - names.names as "PreviousNames" + names.names AS "PreviousNames", + p.ward AS "RegAddress_Ward", + p.laua AS "RegAddress_LocalAuthority", + p.rgn AS "RegAddress_Region", + p.ctry AS "RegAddress_Country" FROM "companies_company" c - left outer join names - on c."CompanyNumber" = names."CompanyNumber" - """ + LEFT OUTER JOIN names + ON c."CompanyNumber" = names."CompanyNumber" + LEFT OUTER JOIN geo_postcode p + ON c."RegAddress_PostCode" = p.pcds + """ ) def get_indexing_queryset(self): diff --git a/ftc/models/related_organisation.py b/ftc/models/related_organisation.py index 45d1b05..b0cf44d 100644 --- a/ftc/models/related_organisation.py +++ b/ftc/models/related_organisation.py @@ -70,6 +70,19 @@ def source_ids(self): ) return list(set(sources)) + @cached_property + def locations(self): + return OrganisationLocation.objects.filter(org_id__in=self.orgIDs) + + def hq_region(self, areatype): + for location in self.locations: + if ( + location.locationType + != OrganisationLocation.LocationTypes.REGISTERED_OFFICE + ): + continue + return getattr(location, f"geo_{areatype}", None) + @cached_property def geocodes(self): location_fields = [ @@ -91,8 +104,7 @@ def geocodes(self): # "geo_lep2", ] geocodes = set() - locations = OrganisationLocation.objects.filter(org_id__in=self.orgIDs) - for location in locations: + for location in self.locations: for field in location_fields: value = getattr(location, field, None) if value and not value.endswith("999999"): diff --git a/reconcile/companies.py b/reconcile/companies.py index daf5f81..1df1028 100644 --- a/reconcile/companies.py +++ b/reconcile/companies.py @@ -4,13 +4,13 @@ from findthatcharity.utils import normalise_name from ftc.documents import CompanyDocument -COMPANY_RECON_TYPE = {"id": "/registered-company", "name": "Registered Company"} +COMPANY_RECON_TYPE = {"id": "registered-company", "name": "Registered Company"} def do_reconcile_query( query, orgtypes="all", - type_="/Organization", + type_: list[str] = [], limit=5, properties=[], type_strict="should", @@ -19,7 +19,7 @@ def do_reconcile_query( if not query: return {result_key: []} - properties = {p["pid"]: p["v"] for p in properties} if properties else {} + properties = {p.pid: p.v for p in properties} if properties else {} search_dict = { "query": { @@ -31,6 +31,14 @@ def do_reconcile_query( "size": limit, } + if type_ and "registered-company" not in type_: + search_dict["query"] = { + "bool": { + "must": search_dict["query"], + "filter": {"terms": {"CompanyCategory.keyword": type_}}, + }, + } + if properties.get("postcode"): # boost the score if the postcode matches search_dict["query"] = { @@ -41,7 +49,7 @@ def do_reconcile_query( "filter": { "match": {"RegAddress_PostCode": properties["postcode"]} }, - "weight": 1.5, + "weight": 5, } ], "boost_mode": "multiply", @@ -63,14 +71,16 @@ def do_reconcile_query( "type": [COMPANY_RECON_TYPE] + [ { - "id": "/{}".format(slugify(o.CompanyCategory)), + "id": slugify(o.CompanyCategory), "name": o.CompanyCategory, } ], "score": o.meta.score, - "match": (normalise_name(o.CompanyName) == normalise_name(query)) - and (o.meta.score == result.hits.max_score) - and (k == 0), + "match": ( + (normalise_name(o.CompanyName) == normalise_name(query)) + and (o.meta.score == result.hits.max_score) + and (k == 0) + ), } for k, o in enumerate(result) ], diff --git a/reconcile/query.py b/reconcile/query.py index fd7d8b8..b1ac35d 100644 --- a/reconcile/query.py +++ b/reconcile/query.py @@ -128,6 +128,8 @@ def do_extend_query(ids, properties): ("aoo", CCEWCharityAreaOfOperation, {"linked_charity_number": 0}), ) + # add location data + for table, model, default_filters in tables: if ccew_fields.get(table): for r in model.objects.filter(