-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpostcode-centroid.py
executable file
·87 lines (70 loc) · 2.84 KB
/
postcode-centroid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python
"""
Return GeoJSON centroids for each postcode in Luxembourg
- Downloads the latest geojson from UDATA_ADDRESSES
- Average the position of all postcodes
- Spit out geojson
Run like :
python3 postcode-centroid.py > centroids.geojson
A sample centroids.geojson.xz (from 2016-07-11) is included.
There are 63 postcodes in Luxembourg that contain only one
address. Some of these are for residential addresses. You
might want to consider merging these points with the nearest
neighbour if you need to anonymise the output data of
your project.
"""
import requests
import sys
import geojson
from collections import defaultdict
# The API endpoint that contains the link to the most recent version of the
# addresses in all available formats (geojson, but also shp).
UDATA_ADDRESSES = 'https://data.public.lu/api/1/datasets/adresses-georeferencees-bd-adresses/'
# Eugh, magic numbers.
# This is just the uuid for the addresses in geojson format.
UDATA_ADDRESSES_ID = '7b58cf20-cbb0-4970-83f7-53a277f691b8'
# Initialise
postcodes = defaultdict(list)
# Udata has no permalink. Parse the API to get the latest geojson.
udata_json = requests.get(UDATA_ADDRESSES).json()
# Find the resource with that ID in the udata json
# i.e. our addresses
for resource in udata_json['resources']:
if resource['id'] == UDATA_ADDRESSES_ID:
ADDRESSES_GEOJSON = resource['url']
break
else:
# Oops, the for loop didn't find anything!
raise IOError("Could not find resource id {} in {}".format(
UDATA_ADDRESSES_ID, UDATA_ADDRESSES
))
# Downloading the addresses might take ~15 seconds.
# In the meanwile, shake your wrists and correct your posture.
addresses = requests.get(ADDRESSES_GEOJSON).json()
# For all addresses, append coordinates to the list of coordinates
# of this postcode.
for address in addresses['features']:
code_postal = address['properties']['code_postal']
coordinates = address['geometry']['coordinates'] #each coordinate is a tuple (long, lat)
postcodes[code_postal].append(coordinates)
# For all postcodes, calculate the centroid
def rounded_location_generator(postcodes):
for postcode, points in postcodes.items():
x, y = zip(*points)
try:
count = len(points)
# round to 6 decimals
centroid = (
round(sum(x) / count, 6),
round(sum(y) / count, 6)
)
except ZeroDivisionError:
print("No address for postcode {}".format(postcode), file=sys.stderr)
pass
yield geojson.Feature(
geometry=geojson.Point(centroid),
properties={"postcode": postcode, "count": count}
)
# Dump the json features as a FeatureCollection
postcodes_centroids = list(rounded_location_generator(postcodes))
print(geojson.dumps(geojson.FeatureCollection(postcodes_centroids)))