Skip to content

Commit

Permalink
add location data to elasticsearch
Browse files Browse the repository at this point in the history
  • Loading branch information
drkane committed Feb 7, 2024
1 parent 54f2747 commit cf039f2
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 22 deletions.
52 changes: 40 additions & 12 deletions ftc/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ class OrganisationGroup(Document):
source = fields.KeywordField()
locations = fields.KeywordField()
search_scale = fields.FloatField()
hq_ward = fields.KeywordField()
hq_localauthority = fields.KeywordField()
hq_region = fields.KeywordField()
hq_country = fields.KeywordField()

@classmethod
def search(cls, using=None, index=None):
Expand Down Expand Up @@ -181,9 +185,11 @@ def prepare_sortname(self, instance):

def prepare_domain(self, instance):
return list(
filter(
lambda item: item is not None,
[get_domain(link) for link in instance.get_all("url")],
set(
filter(
lambda item: item is not None,
[get_domain(link) for link in instance.get_all("url")],
)
)
)

Expand All @@ -196,6 +202,18 @@ def prepare_organisationTypePrimary(self, instance):
def prepare_locations(self, instance):
return instance.geocodes

def prepare_hq_ward(self, instance):
return instance.hq_region("ward")

def prepare_hq_localauthority(self, instance):
return instance.hq_region("laua")

def prepare_hq_region(self, instance):
return instance.hq_region("rgn")

def prepare_hq_country(self, instance):
return instance.hq_region("ctry")

def prepare_search_scale(self, instance):
return instance.search_scale

Expand Down Expand Up @@ -261,6 +279,10 @@ class CompanyDocument(Document):
RegAddress_PostCode = fields.KeywordField(attr="RegAddress_PostCode")
CompanyCategory = fields.KeywordField(attr="CompanyCategory")
PreviousNames = fields.TextField()
RegAddress_Ward = fields.KeywordField(attr="RegAddress_Ward")
RegAddress_LocalAuthority = fields.KeywordField(attr="RegAddress_LocalAuthority")
RegAddress_Region = fields.KeywordField(attr="RegAddress_Region")
RegAddress_Country = fields.KeywordField(attr="RegAddress_Country")

@classmethod
def search(cls, using=None, index=None):
Expand All @@ -282,22 +304,28 @@ def get_queryset(self):
"""
return self.django.model.objects.raw(
"""
with names as (
select "CompanyNumber",
json_agg("CompanyName") as "names"
from "companies_previousname"
group by "CompanyNumber"
WITH names AS (
SELECT "CompanyNumber",
json_agg("CompanyName") AS "names"
FROM "companies_previousname"
GROUP BY "CompanyNumber"
)
SELECT c."CompanyNumber",
c."CompanyName",
c."CompanyStatus",
c."RegAddress_PostCode",
c."CompanyCategory",
names.names as "PreviousNames"
names.names AS "PreviousNames",
p.ward AS "RegAddress_Ward",
p.laua AS "RegAddress_LocalAuthority",
p.rgn AS "RegAddress_Region",
p.ctry AS "RegAddress_Country"
FROM "companies_company" c
left outer join names
on c."CompanyNumber" = names."CompanyNumber"
"""
LEFT OUTER JOIN names
ON c."CompanyNumber" = names."CompanyNumber"
LEFT OUTER JOIN geo_postcode p
ON c."RegAddress_PostCode" = p.pcds
"""
)

def get_indexing_queryset(self):
Expand Down
16 changes: 14 additions & 2 deletions ftc/models/related_organisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,19 @@ def source_ids(self):
)
return list(set(sources))

@cached_property
def locations(self):
return OrganisationLocation.objects.filter(org_id__in=self.orgIDs)

def hq_region(self, areatype):
for location in self.locations:
if (
location.locationType
!= OrganisationLocation.LocationTypes.REGISTERED_OFFICE
):
continue
return getattr(location, f"geo_{areatype}", None)

@cached_property
def geocodes(self):
location_fields = [
Expand All @@ -91,8 +104,7 @@ def geocodes(self):
# "geo_lep2",
]
geocodes = set()
locations = OrganisationLocation.objects.filter(org_id__in=self.orgIDs)
for location in locations:
for location in self.locations:
for field in location_fields:
value = getattr(location, field, None)
if value and not value.endswith("999999"):
Expand Down
26 changes: 18 additions & 8 deletions reconcile/companies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
from findthatcharity.utils import normalise_name
from ftc.documents import CompanyDocument

COMPANY_RECON_TYPE = {"id": "/registered-company", "name": "Registered Company"}
COMPANY_RECON_TYPE = {"id": "registered-company", "name": "Registered Company"}


def do_reconcile_query(
query,
orgtypes="all",
type_="/Organization",
type_: list[str] = [],
limit=5,
properties=[],
type_strict="should",
Expand All @@ -19,7 +19,7 @@ def do_reconcile_query(
if not query:
return {result_key: []}

properties = {p["pid"]: p["v"] for p in properties} if properties else {}
properties = {p.pid: p.v for p in properties} if properties else {}

search_dict = {
"query": {
Expand All @@ -31,6 +31,14 @@ def do_reconcile_query(
"size": limit,
}

if type_ and "registered-company" not in type_:
search_dict["query"] = {
"bool": {
"must": search_dict["query"],
"filter": {"terms": {"CompanyCategory.keyword": type_}},
},
}

if properties.get("postcode"):
# boost the score if the postcode matches
search_dict["query"] = {
Expand All @@ -41,7 +49,7 @@ def do_reconcile_query(
"filter": {
"match": {"RegAddress_PostCode": properties["postcode"]}
},
"weight": 1.5,
"weight": 5,
}
],
"boost_mode": "multiply",
Expand All @@ -63,14 +71,16 @@ def do_reconcile_query(
"type": [COMPANY_RECON_TYPE]
+ [
{
"id": "/{}".format(slugify(o.CompanyCategory)),
"id": slugify(o.CompanyCategory),
"name": o.CompanyCategory,
}
],
"score": o.meta.score,
"match": (normalise_name(o.CompanyName) == normalise_name(query))
and (o.meta.score == result.hits.max_score)
and (k == 0),
"match": (
(normalise_name(o.CompanyName) == normalise_name(query))
and (o.meta.score == result.hits.max_score)
and (k == 0)
),
}
for k, o in enumerate(result)
],
Expand Down
2 changes: 2 additions & 0 deletions reconcile/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ def do_extend_query(ids, properties):
("aoo", CCEWCharityAreaOfOperation, {"linked_charity_number": 0}),
)

# add location data

for table, model, default_filters in tables:
if ccew_fields.get(table):
for r in model.objects.filter(
Expand Down

0 comments on commit cf039f2

Please sign in to comment.