diff --git a/hub/data_imports/geocoding_config.py b/hub/data_imports/geocoding_config.py index ef26d48dd..7d790c48b 100644 --- a/hub/data_imports/geocoding_config.py +++ b/hub/data_imports/geocoding_config.py @@ -374,7 +374,12 @@ async def get_postcode_data_for_area(area, loaders, steps): # Try a few other backup strategies (example postcode, another geocoder) # to get postcodes.io data if postcode_data is None: - postcode = await get_example_postcode_from_area_gss(area.gss) + try: + postcode = await get_example_postcode_from_area_gss(area.gss) + except Exception as e: + logger.error(f"Failed to get example postcode for {area.gss}: {e}") + postcode = None + steps.append( { "task": "postcode_from_area", diff --git a/hub/graphql/types/model_types.py b/hub/graphql/types/model_types.py index d5c94e5fb..3f4debf1e 100644 --- a/hub/graphql/types/model_types.py +++ b/hub/graphql/types/model_types.py @@ -584,8 +584,11 @@ def generic_data_for_hub(self, hostname: str) -> List["GenericData"]: async def sample_postcode( self, info: Info[HubDataLoaderContext] ) -> Optional[PostcodesIOResult]: - return await get_postcode_data_for_gss(self.gss) - # return await info.context.area_coordinate_loader.load(self.point) + try: + return await get_postcode_data_for_gss(self.gss) + except Exception as e: + logger.error(f"Failed to get sample postcode for gss {self.gss}: {e}") + return None @strawberry.type @@ -748,6 +751,10 @@ def query_filter(self) -> dict[str, str]: AnalyticalAreaType.msoa: AreaTypeFilter(lih_area_type="MSOA"), AnalyticalAreaType.lsoa: AreaTypeFilter(lih_area_type="LSOA"), AnalyticalAreaType.output_area: AreaTypeFilter(lih_area_type="OA21"), + AnalyticalAreaType.postcode: AreaTypeFilter(lih_area_type="PC"), + AnalyticalAreaType.postcode_area: AreaTypeFilter(lih_area_type="PCA"), + AnalyticalAreaType.postcode_district: AreaTypeFilter(lih_area_type="PCD"), + AnalyticalAreaType.postcode_sector: AreaTypeFilter(lih_area_type="PCS"), } @@ -1711,12 +1718,17 @@ def choropleth_data_for_source( ) # Get the required data for the source + gss_field = ( + "postcode_data__postcode" + if analytical_area_key == AnalyticalAreaType.postcode + else f"postcode_data__codes__{analytical_area_key.value}" + ) qs = ( external_data_source.get_import_data() .filter(postcode_data__codes__isnull=False) .annotate( label=F(f"postcode_data__{analytical_area_key.value}"), - gss=F(f"postcode_data__codes__{analytical_area_key.value}"), + gss=F(gss_field), latitude=Cast("postcode_data__latitude", output_field=FloatField()), longitude=Cast("postcode_data__longitude", output_field=FloatField()), ) diff --git a/hub/management/commands/export_postcode_areas_as_geojsonl.py b/hub/management/commands/export_postcode_areas_as_geojsonl.py new file mode 100644 index 000000000..2a83b1d26 --- /dev/null +++ b/hub/management/commands/export_postcode_areas_as_geojsonl.py @@ -0,0 +1,31 @@ +import logging +from pathlib import Path + +from django.conf import settings + +# from django postgis +from django.core.management.base import BaseCommand + +from tqdm import tqdm + +from hub.models import Area + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Export Postcodes to GeoJSONl (for Mapbox tileset source creation)" + + def handle(self, *args, **options): + area_type_codes = ["PCS", "PCD", "PCA"] + for area_type_code in area_type_codes: + output_file: Path = ( + settings.BASE_DIR / "data" / f"{area_type_code}.geojsonl" + ) + area_geojsons = Area.objects.filter( + area_type__code=area_type_code + ).values_list("geometry", flat=True) + with output_file.open("w") as f: + for geojson in tqdm(area_geojsons): + f.write(geojson) + f.write("\n") diff --git a/hub/management/commands/import_msoas_and_lsoas.py b/hub/management/commands/import_msoas_and_lsoas.py index b26baf76b..099c8d856 100644 --- a/hub/management/commands/import_msoas_and_lsoas.py +++ b/hub/management/commands/import_msoas_and_lsoas.py @@ -107,7 +107,7 @@ def import_area(self, area, area_type, property_prefix): if isinstance(polygon, Polygon): polygon = MultiPolygon([polygon]) - geom["geometry"] = polygon.json + geom["geometry"] = json.loads(polygon.json) a.geometry = json.dumps(geom) a.polygon = polygon diff --git a/hub/management/commands/import_output_areas.py b/hub/management/commands/import_output_areas.py index 723a83480..b9e05a9c8 100644 --- a/hub/management/commands/import_output_areas.py +++ b/hub/management/commands/import_output_areas.py @@ -88,7 +88,7 @@ def import_area(self, area, area_type): # Transform the geometry polygon.transform(transform) - geom["geometry"] = polygon.json + geom["geometry"] = json.loads(polygon.json) a.geometry = json.dumps(geom) a.polygon = polygon diff --git a/hub/management/commands/import_output_areas_scotland.py b/hub/management/commands/import_output_areas_scotland.py index aa797d091..59d95247c 100644 --- a/hub/management/commands/import_output_areas_scotland.py +++ b/hub/management/commands/import_output_areas_scotland.py @@ -78,7 +78,7 @@ def import_area(self, area, area_type): if isinstance(polygon, Polygon): polygon = MultiPolygon([polygon]) - geom["geometry"] = polygon.json + geom["geometry"] = json.loads(polygon.json) a.geometry = json.dumps(geom) a.polygon = polygon a.point = a.polygon.centroid diff --git a/hub/management/commands/import_postcode_areas.py b/hub/management/commands/import_postcode_areas.py new file mode 100644 index 000000000..0826c6d7f --- /dev/null +++ b/hub/management/commands/import_postcode_areas.py @@ -0,0 +1,207 @@ +import json +import logging +import re +from pathlib import Path + +from django.conf import settings +from django.contrib.gis.db.models import Union as GisUnion + +# from django postgis +from django.contrib.gis.geos import GEOSGeometry, MultiPolygon, Polygon +from django.core.management.base import BaseCommand +from django.db.models import F, Q +from django.db.models.expressions import Expression, RawSQL +from django.db.models.functions import Length, Substr + +from tqdm import tqdm + +from hub.models import Area, AreaType + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Import Postcodes from GeoJSON" + + def add_arguments(self, parser): + parser.add_argument( + "-o", + "--only", + help=""" + Pass one of {PC, PCS, PCD, PCA} to only import that level of area. + Note that postcode units (PC) must be imported first. + """, + ) + + def handle(self, only=None, *args, **options): + if not only or only == "PC": + # E.G. N14 7LU + self.import_postcode_units() + if not only or only == "PCS": + # E.G. N14 7 + self.import_postcode_sectors() + if not only or only == "PCD": + # E.G. N14 + self.import_postcode_districts() + if not only or only == "PCA": + # E.G. N + self.import_postcode_areas() + + def import_postcode_units(self): + print("Importing postcode units") + filepaths: list[Path] = [ + settings.BASE_DIR / "data" / f"postcodes_{i}.geojsonl" for i in range(1, 11) + ] + for filepath in filepaths: + if not filepath.exists(): + print( + f'Missing {filepath.name}. Download from the Mapped MinIO console, "postcodes" bucket.' + ) + return + + print(f"Importing postcode file {filepath.name} of 10") + + data = filepath.read_text() + area_type, created = AreaType.objects.get_or_create( + name="Postcodes", + code="PC", + area_type="Postcode", + description="Postcodes", + ) + + for line in tqdm(re.split(r"\r?\n", data)): + if line.strip(): + area = json.loads(line) + self.import_area(area, area_type) + + def import_area(self, area, area_type): + geom = None + gss = area["properties"]["POSTCODE"] + name = gss + + geom_already_loaded = Area.objects.filter( + gss=gss, polygon__isnull=False + ).exists() + if geom_already_loaded: + # Only fetch geometry data if required, to speed things up + # logger.debug(f"skipping geometry for {area['name']}") + pass + else: + geom = { + "type": "Feature", + "geometry": area["geometry"], + "properties": { + **area["properties"], + "code": gss, + "name": name, + "type": area_type.code, + }, + } + + a, created = Area.objects.update_or_create( + gss=gss, + area_type=area_type, + defaults={"name": name}, + ) + + if geom is not None: + geos = json.dumps(geom["geometry"]) + polygon = GEOSGeometry(geos) + if isinstance(polygon, Polygon): + polygon = MultiPolygon([polygon]) + + geom["geometry"] = json.loads(polygon.json) + + a.geometry = json.dumps(geom) + a.polygon = polygon + a.point = a.polygon.centroid + a.save() + + def import_postcode_sectors(self): + print("Importing postcode sectors") + area_type, created = AreaType.objects.get_or_create( + name="Postcode Sectors", + code="PCS", + area_type="Postcode Sector", + description="Postcode sectors", + ) + self.import_postcode_grouping( + area_type, prefix_expression=Substr(F("gss"), 1, Length(F("gss")) - 2) + ) + + def import_postcode_districts(self): + print("Importing postcode districts") + area_type, created = AreaType.objects.get_or_create( + name="Postcode Districts", + code="PCD", + area_type="Postcode District", + description="Postcode districts", + ) + self.import_postcode_grouping( + area_type, prefix_expression=Substr(F("gss"), 1, Length(F("gss")) - 4) + ) + + def import_postcode_areas(self): + print("Importing postcode areas") + area_type, created = AreaType.objects.get_or_create( + name="Postcode Areas", + code="PCA", + area_type="Postcode Area", + description="Postcode areas", + ) + self.import_postcode_grouping( + area_type, + prefix_expression=RawSQL("SUBSTRING(gss FROM '[a-zA-Z]+')", tuple()), + ) + + def import_postcode_grouping( + self, area_type: AreaType, prefix_expression: Expression + ): + """ + Import postcodes grouped by prefix, calculated by the provided expression, into + the provided area type. + """ + # Only consider postcodes that contain a space + # The others are "vertical streets" which are not relevant + prefixes = ( + Area.objects.filter(Q(gss__contains=" ") & Q(area_type__code="PC")) + .annotate(prefix=prefix_expression) + .values_list("prefix", flat=True) + .distinct() + ) + + for prefix in tqdm(prefixes): + geom_already_loaded = Area.objects.filter( + gss=prefix, polygon__isnull=False + ).exists() + if geom_already_loaded: + continue + + postcodes = Area.objects.annotate(prefix=prefix_expression).filter( + area_type__code="PC", prefix=prefix + ) + polygon = postcodes.aggregate(union=GisUnion("polygon"))["union"] + + if isinstance(polygon, Polygon): + polygon = MultiPolygon([polygon]) + + geom = { + "type": "Feature", + "geometry": json.loads(polygon.json), + "properties": { + "code": prefix, + "name": prefix, + "type": area_type.code, + }, + } + + Area.objects.update_or_create( + gss=prefix, + area_type=area_type, + defaults={ + "name": prefix, + "geometry": json.dumps(geom), + "polygon": polygon, + "point": polygon.centroid, + }, + ) diff --git a/local_intelligence_hub/settings.py b/local_intelligence_hub/settings.py index 19fb191af..c4b9b84bc 100644 --- a/local_intelligence_hub/settings.py +++ b/local_intelligence_hub/settings.py @@ -443,34 +443,16 @@ }, }, "loggers": { - "procrastinate": ( - { - "level": "DEBUG", - "handlers": ["console"], - "class": "logging.StreamHandler", - "formatter": "procrastinate", - } - if ENVIRONMENT != "production" - else { - "handlers": ["truncated"], - "level": "DEBUG", - } - ), - # Silence endless waiting for job log - "procrastinate.worker": ( - { - "level": "DEBUG", - "handlers": ["console"], - "class": "logging.StreamHandler", - "formatter": "procrastinate", - } - if ENVIRONMENT != "production" - else { - "handlers": ["truncated"], - "level": "INFO", - "propagate": False, - } - ), + "procrastinate": { + "handlers": ["truncated"], + "level": "DEBUG", + }, + # Silence endless waiting for job log on prod + "procrastinate.worker.wait_for_job": { + "handlers": ["console"], + "level": "INFO" if ENVIRONMENT == "production" else "DEBUG", + "propagate": False, + }, "django": { "handlers": ["console"], "level": DJANGO_LOG_LEVEL, diff --git a/nextjs/src/__generated__/graphql.ts b/nextjs/src/__generated__/graphql.ts index f0ae5e3d7..0caef6821 100644 --- a/nextjs/src/__generated__/graphql.ts +++ b/nextjs/src/__generated__/graphql.ts @@ -370,7 +370,10 @@ export enum AnalyticalAreaType { OutputArea = 'output_area', ParliamentaryConstituency = 'parliamentary_constituency', ParliamentaryConstituency_2024 = 'parliamentary_constituency_2024', - Postcode = 'postcode' + Postcode = 'postcode', + PostcodeArea = 'postcode_area', + PostcodeDistrict = 'postcode_district', + PostcodeSector = 'postcode_sector' } export type Analytics = { diff --git a/nextjs/src/app/(logged-in)/data-sources/inspect/[externalDataSourceId]/InspectExternalDataSource.tsx b/nextjs/src/app/(logged-in)/data-sources/inspect/[externalDataSourceId]/InspectExternalDataSource.tsx index 0df221643..501ad4453 100644 --- a/nextjs/src/app/(logged-in)/data-sources/inspect/[externalDataSourceId]/InspectExternalDataSource.tsx +++ b/nextjs/src/app/(logged-in)/data-sources/inspect/[externalDataSourceId]/InspectExternalDataSource.tsx @@ -54,6 +54,7 @@ import { useRouter } from 'next/navigation' import pluralize from 'pluralize' import { useEffect, useState } from 'react' import { toast } from 'sonner' +import toSpaceCase from 'to-space-case' import { CREATE_MAP_REPORT } from '../../../reports/ReportList/CreateReportCard' import ExternalDataSourceBadCredentials from './ExternalDataSourceBadCredentials' import { ManageSourceSharing } from './ManageSourceSharing' @@ -304,7 +305,7 @@ export default function InspectExternalDataSource({ {dataType === DataSourceType.Member ? 'Membership list' : dataType - ? pluralize(dataType.toLowerCase()) + ? pluralize(toSpaceCase(dataType.toLowerCase())) : 'Data source'}  •  {crmInfo?.name || crmType} diff --git a/nextjs/src/app/reports/[id]/politicalTilesets.ts b/nextjs/src/app/reports/[id]/politicalTilesets.ts index 85a1565c9..311421f1b 100644 --- a/nextjs/src/app/reports/[id]/politicalTilesets.ts +++ b/nextjs/src/app/reports/[id]/politicalTilesets.ts @@ -11,6 +11,7 @@ export enum BoundaryType { EUROPEAN_ELECTORAL_REGIONS = 'european_electoral_region', LOCAL_AUTHORITIES = 'admin_district', CENSUS_OUTPUT_AREAS = 'census_output_areas', + POSTCODES = 'postcodes', } export function dbAreaTypeToBoundaryType(id: string): BoundaryType | undefined { @@ -28,6 +29,12 @@ export function dbAreaTypeToBoundaryType(id: string): BoundaryType | undefined { return BoundaryType.WARDS case 'EER': return BoundaryType.EUROPEAN_ELECTORAL_REGIONS + // All postcode area types should become postcode boundaries + case 'PC': + case 'PCS': + case 'PCD': + case 'PCA': + return BoundaryType.POSTCODES // All census area types should become census output areas case 'MSOA': case 'LSOA': @@ -167,6 +174,57 @@ const uk: PoliticalTileset[] = [ }, ], }, + { + label: 'Postcodes', + boundaryType: BoundaryType.POSTCODES, + tilesets: [ + { + analyticalAreaType: AnalyticalAreaType.PostcodeArea, + name: 'Postcode Areas', + singular: 'Postcode Area', + mapboxSourceId: 'commonknowledge.postcode_areas', + sourceLayerId: 'postcode_areas', + promoteId: 'code', + labelId: 'name', + minZoom: 0, + maxZoom: 10, + }, + { + analyticalAreaType: AnalyticalAreaType.PostcodeDistrict, + name: 'Postcode Districts', + singular: 'Postcode District', + mapboxSourceId: 'commonknowledge.postcode_districts', + sourceLayerId: 'postcode_districts', + promoteId: 'code', + labelId: 'name', + minZoom: 10, + maxZoom: 12, + }, + { + analyticalAreaType: AnalyticalAreaType.PostcodeSector, + name: 'Postcode Sectors', + singular: 'Postcode Sector', + mapboxSourceId: 'commonknowledge.postcode_sectors', + sourceLayerId: 'postcode_sectors', + promoteId: 'code', + labelId: 'name', + minZoom: 12, + maxZoom: 14, + }, + { + analyticalAreaType: AnalyticalAreaType.Postcode, + name: 'Postcodes', + singular: 'Postcode', + mapboxSourceId: 'commonknowledge.postcodes', + sourceLayerId: 'postcodes', + promoteId: 'POSTCODE', + labelId: 'POSTCODE', + minZoom: 14, + maxZoom: MAX_VALID_ZOOM, + useBoundsInDataQuery: true, + }, + ], + }, ] export const POLITICAL_BOUNDARIES = uk diff --git a/utils/geo_reference.py b/utils/geo_reference.py index 70cf4b991..eee6748fc 100644 --- a/utils/geo_reference.py +++ b/utils/geo_reference.py @@ -19,6 +19,9 @@ class AnalyticalAreaType(Enum): admin_county = "admin_county" admin_ward = "admin_ward" postcode = "postcode" + postcode_area = "postcode_area" + postcode_district = "postcode_district" + postcode_sector = "postcode_sector" european_electoral_region = "european_electoral_region" country = "country" msoa = "msoa" @@ -36,6 +39,10 @@ class AnalyticalAreaType(Enum): "EER": AnalyticalAreaType.european_electoral_region, "CTRY": AnalyticalAreaType.country, "OA21": AnalyticalAreaType.output_area, + "PC": AnalyticalAreaType.postcode, + "PCA": AnalyticalAreaType.postcode_area, + "PCD": AnalyticalAreaType.postcode_district, + "PCS": AnalyticalAreaType.postcode_sector, } diff --git a/utils/postcodesIO.py b/utils/postcodesIO.py index 74abe01c0..b066284d8 100644 --- a/utils/postcodesIO.py +++ b/utils/postcodesIO.py @@ -88,6 +88,7 @@ async def enrich_postcodes_io_result( 1. Add legacy `parliamentary_constituency_2024` key 2. Add EER code (not in postcodes.io) 3. Add output_area (not in postcodes.io) + 4. Add postcode_area (not in postcodes.io) """ if not result: return None @@ -115,11 +116,13 @@ async def enrich_postcodes_io_result( ("OA21", "output_area"), ("MSOA", "msoa"), ("LSOA", "lsoa"), + ("PCA", "postcode_area"), + ("PCD", "postcode_district"), + ("PCS", "postcode_sector"), ]: output_area = await Area.objects.filter( area_type__code=area_code, polygon__contains=point ).afirst() - if output_area: result[result_key] = output_area.name result["codes"][result_key] = output_area.gss