-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_geojson_data.py
105 lines (87 loc) · 4.07 KB
/
get_geojson_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""
Builds the geojson (map files) for Sweden at both the county and municipality level.
Allows me to make the choropleth maps.
Terminology Note:
The code in this script uses the Swedish words "kommun"/"kommuner"
(translates to Municipality/Municiplaities in English) for varaiable naming.
County/Counties (and not län/län) are used however.
"""
from typing import Tuple
import json
from urllib.request import urlopen
import pandas as pd
def get_geojson_map(relation_numbers: list) -> Tuple[dict, list]:
"""
Extract geojson data for both county and kommuner.
Store any counties or kommuner that failed to be extracted.
Parameters
----------
relation_numbers : list
list of unique i.d.s for each county/kommun.
Returns
-------
complete_map : dict
Municipality/Kommun name as key and value is the paragraph
fail_list : list
Counties or kommuner that failed to be extracted.
"""
url_geojson_pre = "http://polygons.openstreetmap.fr/get_geojson.py?id="
url_geojson_post = "¶ms=0"
kommuner_features = []
numb_fails = 0
fail_list = []
for relation in relation_numbers:
relation_url = url_geojson_pre + str(relation) + url_geojson_post
with urlopen(relation_url) as response:
try:
coordset = json.load(response)
# Reformat coordsets before appending.
coordset["type"] = "Feature"
coordset["geometries"] = coordset["geometries"][0]
coordset["geometry"] = coordset.pop("geometries")
coordset["id"] = relation
kommuner_features.append(coordset)
except ValueError: # includes simplejson.decoder.JSONDecodeError
numb_fails += 1
fail_list.append(relation)
print(f"Getting JSON Data failed {numb_fails} times.")
# Now construct the complete geoJson file.
complete_map = {}
complete_map = {"type": "FeatureCollection"} # headline for all.
complete_map["features"] = kommuner_features
return complete_map, fail_list
def main():
"""Webscrape maps, build complete maps and save"""
# Grab Kommun and county relation numbers from wiki.
# No comments on either page so happy to drop them.
url_kommuner = "https://wiki.openstreetmap.org/wiki/Sweden/Kommuner"
df_kommuner = (pd.read_html(url_kommuner))[0] # I want 1st table on page
df_kommuner = df_kommuner.drop(["KommentarComment"], axis=1)
df_kommuner = df_kommuner.rename(columns={"KommunMunicipality": "kommun"})
kommuner_relation_nums = df_kommuner["Relation"]
url_counties = "https://wiki.openstreetmap.org/wiki/Sweden/L%C3%A4n"
df_counties = (pd.read_html(url_counties))[1] # I want 2nd table on page
drop_list = ["SCB", "NUTS-1", "NUTS-2", "NUTS-3",
"FIPS", "ISO-3166-2", "KommentarComment"]
df_counties = df_counties.drop(drop_list, axis=1)
df_counties = df_counties.rename(columns={"LänCounty": "county"})
# rename län to county for record matching
# (Swedish version of county town names often end with "s")
df_counties["county"] = (
df_counties["county"].str.replace("s län", " county"))
df_counties["county"] = df_counties["county"].str.replace("län", "county")
counties_relations_nums = df_counties["Relation"]
counties_map, counties_fail_list = get_geojson_map(counties_relations_nums)
kommuner_map, kommuner_fail_list = get_geojson_map(kommuner_relation_nums)
# Remove any that are in the fail_list (thankfully none).
df_kommuner = df_kommuner[~df_kommuner.Relation.isin(kommuner_fail_list)]
df_counties = df_counties[~df_counties.Relation.isin(counties_fail_list)]
# Write out the dataframes and map files.
df_kommuner.to_csv("assets/kommuner_list.csv", index=False)
df_counties.to_csv("assets/counties_list.csv", index=False)
with open("assets/kommuner_map.json", "w") as outfile:
json.dump(kommuner_map, outfile)
with open("assets/counties_map.json", "w") as outfile:
json.dump(counties_map, outfile)
if __name__ == "__main__":
main()