Skip to content

Commit

Permalink
updates and type checking
Browse files Browse the repository at this point in the history
  • Loading branch information
drkane committed Feb 23, 2023
1 parent a6236a6 commit dc30707
Show file tree
Hide file tree
Showing 18 changed files with 695 additions and 274 deletions.
61 changes: 52 additions & 9 deletions findthatpostcode/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
import logging
from typing import List

from botocore.client import BaseClient
from elasticsearch import Elasticsearch
from fastapi import APIRouter, Depends, Form, HTTPException, Query, status
from mypy_boto3_s3.client import S3Client
from pydantic_geojson import FeatureModel
from sqlalchemy.orm import Session

from findthatpostcode import crud
from findthatpostcode.db import get_db
from findthatpostcode.schemas import Area, HTTPNotFoundError, NearestPoint, Postcode
from findthatpostcode.db import get_db, get_s3_client
from findthatpostcode.schemas import (
Area,
HTTPNotFoundError,
NearestPoint,
Postcode,
PostcodeHashResults,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -39,12 +48,23 @@ async def read_postcode(postcode: str, db: Elasticsearch = Depends(get_db)):
return postcode_item


@router.get("/hash/{hash}", tags=["Postcode hash"], include_in_schema=False)
@router.get("/hash/{hash}.json", tags=["Postcode hash"], include_in_schema=False)
@router.get(
"/hash/{hash}",
tags=["Postcode hash"],
include_in_schema=False,
response_model=PostcodeHashResults,
)
@router.get(
"/hash/{hash}.json",
tags=["Postcode hash"],
include_in_schema=False,
response_model=PostcodeHashResults,
)
async def single_hash(
hash: str, fields: list[str] = Query([]), db: Session = Depends(get_db)
hash: str, fields: list[str] = Query([]), db: Elasticsearch = Depends(get_db)
):
postcode_items = crud.get_postcode_by_hash(db, [hash], fields=fields)
print(postcode_items)
return {"data": list(postcode_items)}


Expand All @@ -53,11 +73,12 @@ async def single_hash(
tags=["Postcode hash"],
include_in_schema=False,
name="multiple_hash",
response_model=PostcodeHashResults,
)
async def multiple_hash(
hash: list[str] = Form([]),
properties: list[str] = Form([]),
db: Session = Depends(get_db),
db: Elasticsearch = Depends(get_db),
):
postcode_items = crud.get_postcode_by_hash(db, hash, fields=properties)
return {"data": list(postcode_items)}
Expand All @@ -69,7 +90,9 @@ async def multiple_hash(
tags=["Get a point"],
description="Get nearest postcode to a Lat, Long",
)
async def find_nearest_point(lat: float, long: float, db: Session = Depends(get_db)):
async def find_nearest_point(
lat: float, long: float, db: Elasticsearch = Depends(get_db)
):
postcode_item = crud.get_nearest_postcode(db, lat, long)
if not postcode_item:
raise HTTPException(
Expand All @@ -85,7 +108,7 @@ async def find_nearest_point(lat: float, long: float, db: Session = Depends(get_
tags=["Areas"],
description="Get data about an area",
)
async def get_area(areacode: str, db: Session = Depends(get_db)):
async def get_area(areacode: str, db: Elasticsearch = Depends(get_db)):
area = crud.get_area(db, areacode)
if not area:
raise HTTPException(
Expand All @@ -95,13 +118,33 @@ async def get_area(areacode: str, db: Session = Depends(get_db)):
return area


@router.get(
"/areas/{areacode}.geojson",
response_model=FeatureModel,
tags=["Areas"],
description="Get an area's boundary as a geojson file",
)
async def get_area_boundary(
areacode: str,
db: Elasticsearch = Depends(get_db),
client: S3Client = Depends(get_s3_client),
):
area = crud.get_area_boundary(db, client, areacode)
if not area:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No area found for {}".format(areacode),
)
return area


@router.get(
"/areas/search.json",
response_model=Area,
tags=["Areas"],
description="Search areas",
)
async def search_areas(areacode: str, db: Session = Depends(get_db)):
async def search_areas(areacode: str, db: Elasticsearch = Depends(get_db)):
area = crud.get_area(db, areacode)
if not area:
raise HTTPException(
Expand Down
124 changes: 74 additions & 50 deletions findthatpostcode/commands/boundaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
import io
import json
import os
from typing import List, Optional

import click
import requests
import requests_cache
import tqdm
from pydantic_geojson import FeatureCollectionModel, FeatureModel

from findthatpostcode import db, settings
from findthatpostcode.documents import Area
Expand All @@ -24,7 +26,11 @@
@click.option("--remove/--no-remove", default=False)
@click.argument("urls", nargs=-1)
def import_boundaries(
urls, examine=False, code_field=None, es_index=AREA_INDEX, remove=False
urls: List[str],
examine: bool = False,
code_field: Optional[str] = None,
es_index: str = AREA_INDEX,
remove: bool = False,
):

es = db.get_db()
Expand Down Expand Up @@ -56,30 +62,49 @@ def import_boundaries(


def import_boundary(client, url, examine=False, code_field=None):
boundary_data = {}
if url.startswith("http"):
r = requests.get(url, stream=True)
boundaries = r.json()
boundary_data = r.json()
elif os.path.isfile(url):
with open(url, encoding="latin1") as f:
boundaries = json.load(f)
with open(url, mode="r", encoding="latin1") as f:
boundary_data = json.load(f)
boundaries = FeatureCollectionModel.parse_obj(boundary_data)
errors = []

# find the code field for a boundary
if len(boundaries.get("features", [])) == 0:
if len(boundaries.features) == 0:
errors.append("[ERROR][%s] Features not found in file" % (url,))
if len(boundaries.get("features", [])) > 0 and not code_field:
test_boundary = boundaries.get("features", [])[0]
code_fields = []
for k in test_boundary.get("properties", {}):
if k.lower().endswith("cd"):
code_fields.append(k)
if len(code_fields) == 1:
code_field = code_fields[0]
elif len(code_fields) == 0:
errors.append("[ERROR][%s] No code field found in file" % (url,))
if len(boundaries.features) > 0 and not code_field:
test_boundary = None
for k in boundaries.features:
if isinstance(k, FeatureModel):
test_boundary = k
break
if not test_boundary:
errors.append("[ERROR][%s] No valid features found in file" % (url,))
else:
errors.append("[ERROR][%s] Too many code fields found in file" % (url,))
errors.append("[ERROR][%s] Code fields: %s" % (url, "; ".join(code_fields)))
code_fields = []
properties = getattr(test_boundary, "properties", None)
if properties:
for k in properties:
if k.lower().endswith("cd"):
code_fields.append(k)
if len(code_fields) == 1:
code_field = code_fields[0]
elif len(code_fields) == 0:
errors.append("[ERROR][%s] No code field found in file" % (url,))
else:
errors.append("[ERROR][%s] Too many code fields found in file" % (url,))
errors.append(
"[ERROR][%s] Code fields: %s" % (url, "; ".join(code_fields))
)

if isinstance(code_field, str):
code = code_field.lower().replace("cd", "")
else:
code = "unknown"
errors.append("[ERROR][%s] No code field found in file" % (url,))

if len(errors) > 0:
if examine:
Expand All @@ -88,51 +113,50 @@ def import_boundary(client, url, examine=False, code_field=None):
else:
raise ValueError("; ".join(errors))

code = code_field.lower().replace("cd", "")

if examine:
print("[%s] Opened file: [%s]" % (code, url))
print("[%s] Looking for code field: [%s]" % (code, code_field))
print("[%s] Geojson type: [%s]" % (code, boundaries["type"]))
print("[%s] Number of features [%s]" % (code, len(boundaries["features"])))
for k, i in enumerate(boundaries["features"][:5]):
print("[%s] Feature %s type %s" % (code, k, i["type"]))
print(
"[%s] Feature %s properties %s"
% (code, k, list(i["properties"].keys()))
)
print("[%s] Feature %s geometry type %s" % (code, k, i["geometry"]["type"]))
print(
"[%s] Feature %s geometry length %s"
% (code, k, len(str(i["geometry"]["coordinates"])))
)
if code_field in i["properties"]:
print("[%s] Geojson type: [%s]" % (code, boundaries.type))
print("[%s] Number of features [%s]" % (code, len(boundaries.features)))
for k, i in enumerate(boundaries.features[:5]):
print("[%s] Feature %s type %s" % (code, k, i.type))
if isinstance(i, FeatureModel):
properties = getattr(i, "properties", {})
print(
"[%s] Feature %s Code %s" % (code, k, i["properties"][code_field])
"[%s] Feature %s properties %s" % (code, k, list(properties.keys()))
)
else:
print("[%s] Feature %s geometry type %s" % (code, k, i.geometry.type))
print(
"[ERROR][%s] Feature %s Code field not found"
% (
code,
k,
)
"[%s] Feature %s geometry length %s"
% (code, k, len(str(i.geometry.coordinates)))
)
if code_field in properties:
print("[%s] Feature %s Code %s" % (code, k, properties[code_field]))
else:
print(
"[ERROR][%s] Feature %s Code field not found"
% (
code,
k,
)
)

else:
print("[%s] Opened file: [%s]" % (code, url))
print("[%s] %s features to import" % (code, len(boundaries["features"])))
print("[%s] %s features to import" % (code, len(boundaries.features)))
boundary_count = 0
errors = []
for k, i in tqdm.tqdm(
enumerate(boundaries["features"]), total=len(boundaries["features"])
enumerate(boundaries.features), total=len(boundaries.features)
):
area_code = i["properties"][code_field]
prefix = area_code[0:3]
client.upload_fileobj(
io.BytesIO(json.dumps(i).encode("utf-8")),
settings.S3_BUCKET,
"%s/%s.json" % (prefix, area_code),
)
boundary_count += 1
if isinstance(i, FeatureModel):
properties = getattr(i, "properties", {})
area_code = properties[code_field]
prefix = area_code[0:3]
client.upload_fileobj(
io.BytesIO(json.dumps(i).encode("utf-8")),
settings.S3_BUCKET,
"%s/%s.json" % (prefix, area_code),
)
boundary_count += 1
print("[%s] %s boundaries imported" % (code, boundary_count))
Loading

0 comments on commit dc30707

Please sign in to comment.