Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add command to import postcode areas and code to display on choropleth #202

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion hub/data_imports/geocoding_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,12 @@ async def get_postcode_data_for_area(area, loaders, steps):
# Try a few other backup strategies (example postcode, another geocoder)
# to get postcodes.io data
if postcode_data is None:
postcode = await get_example_postcode_from_area_gss(area.gss)
try:
postcode = await get_example_postcode_from_area_gss(area.gss)
except Exception as e:
logger.error(f"Failed to get example postcode for {area.gss}: {e}")
postcode = None

steps.append(
{
"task": "postcode_from_area",
Expand Down
18 changes: 15 additions & 3 deletions hub/graphql/types/model_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,8 +584,11 @@ def generic_data_for_hub(self, hostname: str) -> List["GenericData"]:
async def sample_postcode(
self, info: Info[HubDataLoaderContext]
) -> Optional[PostcodesIOResult]:
return await get_postcode_data_for_gss(self.gss)
# return await info.context.area_coordinate_loader.load(self.point)
try:
return await get_postcode_data_for_gss(self.gss)
except Exception as e:
logger.error(f"Failed to get sample postcode for gss {self.gss}: {e}")
return None


@strawberry.type
Expand Down Expand Up @@ -748,6 +751,10 @@ def query_filter(self) -> dict[str, str]:
AnalyticalAreaType.msoa: AreaTypeFilter(lih_area_type="MSOA"),
AnalyticalAreaType.lsoa: AreaTypeFilter(lih_area_type="LSOA"),
AnalyticalAreaType.output_area: AreaTypeFilter(lih_area_type="OA21"),
AnalyticalAreaType.postcode: AreaTypeFilter(lih_area_type="PC"),
AnalyticalAreaType.postcode_area: AreaTypeFilter(lih_area_type="PCA"),
AnalyticalAreaType.postcode_district: AreaTypeFilter(lih_area_type="PCD"),
AnalyticalAreaType.postcode_sector: AreaTypeFilter(lih_area_type="PCS"),
}


Expand Down Expand Up @@ -1711,12 +1718,17 @@ def choropleth_data_for_source(
)

# Get the required data for the source
gss_field = (
"postcode_data__postcode"
if analytical_area_key == AnalyticalAreaType.postcode
else f"postcode_data__codes__{analytical_area_key.value}"
)
qs = (
external_data_source.get_import_data()
.filter(postcode_data__codes__isnull=False)
.annotate(
label=F(f"postcode_data__{analytical_area_key.value}"),
gss=F(f"postcode_data__codes__{analytical_area_key.value}"),
gss=F(gss_field),
latitude=Cast("postcode_data__latitude", output_field=FloatField()),
longitude=Cast("postcode_data__longitude", output_field=FloatField()),
)
Expand Down
31 changes: 31 additions & 0 deletions hub/management/commands/export_postcode_areas_as_geojsonl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import logging
from pathlib import Path

from django.conf import settings

# from django postgis
from django.core.management.base import BaseCommand

from tqdm import tqdm

from hub.models import Area

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = "Export Postcodes to GeoJSONl (for Mapbox tileset source creation)"

def handle(self, *args, **options):
area_type_codes = ["PCS", "PCD", "PCA"]
for area_type_code in area_type_codes:
output_file: Path = (
settings.BASE_DIR / "data" / f"{area_type_code}.geojsonl"
)
area_geojsons = Area.objects.filter(
area_type__code=area_type_code
).values_list("geometry", flat=True)
with output_file.open("w") as f:
for geojson in tqdm(area_geojsons):
f.write(geojson)
f.write("\n")
2 changes: 1 addition & 1 deletion hub/management/commands/import_msoas_and_lsoas.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def import_area(self, area, area_type, property_prefix):
if isinstance(polygon, Polygon):
polygon = MultiPolygon([polygon])

geom["geometry"] = polygon.json
geom["geometry"] = json.loads(polygon.json)

a.geometry = json.dumps(geom)
a.polygon = polygon
Expand Down
2 changes: 1 addition & 1 deletion hub/management/commands/import_output_areas.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def import_area(self, area, area_type):
# Transform the geometry
polygon.transform(transform)

geom["geometry"] = polygon.json
geom["geometry"] = json.loads(polygon.json)

a.geometry = json.dumps(geom)
a.polygon = polygon
Expand Down
2 changes: 1 addition & 1 deletion hub/management/commands/import_output_areas_scotland.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def import_area(self, area, area_type):
if isinstance(polygon, Polygon):
polygon = MultiPolygon([polygon])

geom["geometry"] = polygon.json
geom["geometry"] = json.loads(polygon.json)
a.geometry = json.dumps(geom)
a.polygon = polygon
a.point = a.polygon.centroid
Expand Down
207 changes: 207 additions & 0 deletions hub/management/commands/import_postcode_areas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import json
import logging
import re
from pathlib import Path

from django.conf import settings
from django.contrib.gis.db.models import Union as GisUnion

# from django postgis
from django.contrib.gis.geos import GEOSGeometry, MultiPolygon, Polygon
from django.core.management.base import BaseCommand
from django.db.models import F, Q
from django.db.models.expressions import Expression, RawSQL
from django.db.models.functions import Length, Substr

from tqdm import tqdm

from hub.models import Area, AreaType

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = "Import Postcodes from GeoJSON"

def add_arguments(self, parser):
parser.add_argument(
"-o",
"--only",
help="""
Pass one of {PC, PCS, PCD, PCA} to only import that level of area.
Note that postcode units (PC) must be imported first.
""",
)

def handle(self, only=None, *args, **options):
if not only or only == "PC":
# E.G. N14 7LU
self.import_postcode_units()
if not only or only == "PCS":
# E.G. N14 7
self.import_postcode_sectors()
if not only or only == "PCD":
# E.G. N14
self.import_postcode_districts()
if not only or only == "PCA":
# E.G. N
self.import_postcode_areas()

def import_postcode_units(self):
print("Importing postcode units")
filepaths: list[Path] = [
settings.BASE_DIR / "data" / f"postcodes_{i}.geojsonl" for i in range(1, 11)
]
for filepath in filepaths:
if not filepath.exists():
print(
f'Missing {filepath.name}. Download from the Mapped MinIO console, "postcodes" bucket.'
)
return

print(f"Importing postcode file {filepath.name} of 10")

data = filepath.read_text()
area_type, created = AreaType.objects.get_or_create(
name="Postcodes",
code="PC",
area_type="Postcode",
description="Postcodes",
)

for line in tqdm(re.split(r"\r?\n", data)):
if line.strip():
area = json.loads(line)
self.import_area(area, area_type)

def import_area(self, area, area_type):
geom = None
gss = area["properties"]["POSTCODE"]
name = gss

geom_already_loaded = Area.objects.filter(
gss=gss, polygon__isnull=False
).exists()
if geom_already_loaded:
# Only fetch geometry data if required, to speed things up
# logger.debug(f"skipping geometry for {area['name']}")
pass
else:
geom = {
"type": "Feature",
"geometry": area["geometry"],
"properties": {
**area["properties"],
"code": gss,
"name": name,
"type": area_type.code,
},
}

a, created = Area.objects.update_or_create(
gss=gss,
area_type=area_type,
defaults={"name": name},
)

if geom is not None:
geos = json.dumps(geom["geometry"])
polygon = GEOSGeometry(geos)
if isinstance(polygon, Polygon):
polygon = MultiPolygon([polygon])

geom["geometry"] = json.loads(polygon.json)

a.geometry = json.dumps(geom)
a.polygon = polygon
a.point = a.polygon.centroid
a.save()

def import_postcode_sectors(self):
print("Importing postcode sectors")
area_type, created = AreaType.objects.get_or_create(
name="Postcode Sectors",
code="PCS",
area_type="Postcode Sector",
description="Postcode sectors",
)
self.import_postcode_grouping(
area_type, prefix_expression=Substr(F("gss"), 1, Length(F("gss")) - 2)
)

def import_postcode_districts(self):
print("Importing postcode districts")
area_type, created = AreaType.objects.get_or_create(
name="Postcode Districts",
code="PCD",
area_type="Postcode District",
description="Postcode districts",
)
self.import_postcode_grouping(
area_type, prefix_expression=Substr(F("gss"), 1, Length(F("gss")) - 4)
)

def import_postcode_areas(self):
print("Importing postcode areas")
area_type, created = AreaType.objects.get_or_create(
name="Postcode Areas",
code="PCA",
area_type="Postcode Area",
description="Postcode areas",
)
self.import_postcode_grouping(
area_type,
prefix_expression=RawSQL("SUBSTRING(gss FROM '[a-zA-Z]+')", tuple()),
)

def import_postcode_grouping(
self, area_type: AreaType, prefix_expression: Expression
):
"""
Import postcodes grouped by prefix, calculated by the provided expression, into
the provided area type.
"""
# Only consider postcodes that contain a space
# The others are "vertical streets" which are not relevant
prefixes = (
Area.objects.filter(Q(gss__contains=" ") & Q(area_type__code="PC"))
.annotate(prefix=prefix_expression)
.values_list("prefix", flat=True)
.distinct()
)

for prefix in tqdm(prefixes):
geom_already_loaded = Area.objects.filter(
gss=prefix, polygon__isnull=False
).exists()
if geom_already_loaded:
continue

postcodes = Area.objects.annotate(prefix=prefix_expression).filter(
area_type__code="PC", prefix=prefix
)
polygon = postcodes.aggregate(union=GisUnion("polygon"))["union"]

if isinstance(polygon, Polygon):
polygon = MultiPolygon([polygon])

geom = {
"type": "Feature",
"geometry": json.loads(polygon.json),
"properties": {
"code": prefix,
"name": prefix,
"type": area_type.code,
},
}

Area.objects.update_or_create(
gss=prefix,
area_type=area_type,
defaults={
"name": prefix,
"geometry": json.dumps(geom),
"polygon": polygon,
"point": polygon.centroid,
},
)
38 changes: 10 additions & 28 deletions local_intelligence_hub/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,34 +443,16 @@
},
},
"loggers": {
"procrastinate": (
{
"level": "DEBUG",
"handlers": ["console"],
"class": "logging.StreamHandler",
"formatter": "procrastinate",
}
if ENVIRONMENT != "production"
else {
"handlers": ["truncated"],
"level": "DEBUG",
}
),
# Silence endless waiting for job log
"procrastinate.worker": (
{
"level": "DEBUG",
"handlers": ["console"],
"class": "logging.StreamHandler",
"formatter": "procrastinate",
}
if ENVIRONMENT != "production"
else {
"handlers": ["truncated"],
"level": "INFO",
"propagate": False,
}
),
"procrastinate": {
"handlers": ["truncated"],
"level": "DEBUG",
},
# Silence endless waiting for job log on prod
"procrastinate.worker.wait_for_job": {
"handlers": ["console"],
"level": "INFO" if ENVIRONMENT == "production" else "DEBUG",
"propagate": False,
},
"django": {
"handlers": ["console"],
"level": DJANGO_LOG_LEVEL,
Expand Down
5 changes: 4 additions & 1 deletion nextjs/src/__generated__/graphql.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import { useRouter } from 'next/navigation'
import pluralize from 'pluralize'
import { useEffect, useState } from 'react'
import { toast } from 'sonner'
import toSpaceCase from 'to-space-case'
import { CREATE_MAP_REPORT } from '../../../reports/ReportList/CreateReportCard'
import ExternalDataSourceBadCredentials from './ExternalDataSourceBadCredentials'
import { ManageSourceSharing } from './ManageSourceSharing'
Expand Down Expand Up @@ -304,7 +305,7 @@ export default function InspectExternalDataSource({
{dataType === DataSourceType.Member
? 'Membership list'
: dataType
? pluralize(dataType.toLowerCase())
? pluralize(toSpaceCase(dataType.toLowerCase()))
: 'Data source'}
<span>&nbsp;&#x2022;&nbsp;</span>
{crmInfo?.name || crmType}
Expand Down
Loading
Loading