Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Geoboundaries as an alternative to GADM #1370

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 54 additions & 11 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -1039,33 +1039,76 @@ rule prepare_transport_data_input:

if not config["custom_data"]["gas_network"]:

alternative_gas_clustering = {}

# Include all parameters if alternative_clustering is True
if config["cluster_options"]["alternative_clustering"]:
alternative_gas_clustering.update(
{
"admin_shape": config["build_shape_options"]["admin_shape"],
"countries_list": config["countries"],
"layer_id": config["build_shape_options"]["gadm_layer_id"],
"update": config["build_shape_options"]["update_file"],
"out_logging": config["build_shape_options"]["out_logging"],
"year": config["build_shape_options"]["year"],
"nprocesses": config["build_shape_options"]["nprocesses"],
"contended_flag": config["build_shape_options"]["contended_flag"],
"geo_crs": config["crs"]["geo_crs"],
}
)

rule prepare_gas_network:
params:
**alternative_gas_clustering,
gas_config=config["sector"]["gas"],
alternative_clustering=config["cluster_options"]["alternative_clustering"],
countries_list=config["countries"],
layer_id=config["build_shape_options"]["gadm_layer_id"],
update=config["build_shape_options"]["update_file"],
out_logging=config["build_shape_options"]["out_logging"],
year=config["build_shape_options"]["year"],
nprocesses=config["build_shape_options"]["nprocesses"],
contended_flag=config["build_shape_options"]["contended_flag"],
geo_crs=config["crs"]["geo_crs"],
custom_gas_network=config["custom_data"]["gas_network"],
input:
regions_onshore="resources/"
+ RDIR
+ "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson",
output:
preclustered_gas_network="resources/"
+ SECDIR
+ "gas_networks/pre_gas_network_elec_s{simpl}_{clusters}.csv",
clustered_gas_network="resources/"
+ SECDIR
+ "gas_networks/gas_network_elec_s{simpl}_{clusters}.csv",
# TODO: Should be a own snakemake rule
# gas_network_fig_1="resources/gas_networks/existing_gas_pipelines_{simpl}_{clusters}.png",
# gas_network_fig_2="resources/gas_networks/clustered_gas_pipelines_{simpl}_{clusters}.png",
bus_regions_onshore="resources/"
+ RDIR
+ "gas_networks/gas_regions_onshore_elec_s{simpl}_{clusters}.geojson",
country_borders="resources/"
+ RDIR
+ "gas_networks/country_borders_elec_s{simpl}_{clusters}.geojson",
script:
"scripts/prepare_gas_network.py"

rule plot_gas_network:
params:
gas_network_data=config["sector"]["gas"]["network_data"],
input:
preclustered_gas_network="resources/"
+ SECDIR
+ "gas_networks/pre_gas_network_elec_s{simpl}_{clusters}.csv",
clustered_gas_network="resources/"
+ SECDIR
+ "gas_networks/gas_network_elec_s{simpl}_{clusters}.csv",
bus_regions_onshore="resources/"
+ RDIR
+ "gas_networks/gas_regions_onshore_elec_s{simpl}_{clusters}.geojson",
country_borders="resources/"
+ RDIR
+ "gas_networks/country_borders_elec_s{simpl}_{clusters}.geojson",
output:
existing_gas_pipelines="resources/"
+ RDIR
+ "gas_networks/existing_gas_pipelines_s{simpl}_{clusters}.png",
clustered_gas_pipelines="resources/"
+ RDIR
+ "gas_networks/clustered_gas_pipelines_s{simpl}_{clusters}.png",
script:
"scripts/plot_gas_network.py"


rule prepare_sector_network:
params:
Expand Down
1 change: 1 addition & 0 deletions config.default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ cluster_options:
efficiency: mean

build_shape_options:
admin_shape: "geoboundaries" # "geoboundaries" or "GADM" Datasource for geographic administrations. If not selected then "GADM" is chosen
gadm_layer_id: 1 # GADM level area used for the gadm_shapes. Codes are country-dependent but roughly: 0: country, 1: region/county-like, 2: municipality-like
simplify_gadm: true # When true, shape polygons are simplified else no
update_file: false # When true, all the input files are downloaded again and replace the existing files
Expand Down
25 changes: 16 additions & 9 deletions scripts/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ def two_2_three_digits_country(two_code_country):
if two_code_country == "SN-GM":
return f"{two_2_three_digits_country('SN')}-{two_2_three_digits_country('GM')}"

three_code_country = coco.convert(two_code_country, to="ISO3")
three_code_country = coco.convert(two_code_country, to="ISO3", not_found=None)
return three_code_country


Expand All @@ -670,7 +670,7 @@ def three_2_two_digits_country(three_code_country):
if three_code_country == "SEN-GMB":
return f"{three_2_two_digits_country('SN')}-{three_2_two_digits_country('GM')}"

two_code_country = coco.convert(three_code_country, to="ISO2")
two_code_country = coco.convert(three_code_country, to="ISO2", not_found=None)
return two_code_country


Expand All @@ -697,7 +697,7 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word
if two_code_country == "SN-GM":
return f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}"

full_name = coco.convert(two_code_country, to="name_short")
full_name = coco.convert(two_code_country, to="name_short", not_found=None)

if nocomma:
# separate list by delim
Expand Down Expand Up @@ -740,7 +740,7 @@ def country_name_2_two_digits(country_name):
):
return "SN-GM"

full_name = coco.convert(country_name, to="ISO2")
full_name = coco.convert(country_name, to="ISO2", not_found=None)
return full_name


Expand Down Expand Up @@ -1227,7 +1227,9 @@ def download_GADM(country_code, update=False, out_logging=False):
return GADM_inputfile_gpkg, GADM_filename


def _get_shape_col_gdf(path_to_gadm, co, gadm_layer_id, gadm_clustering):
def _get_shape_col_gdf(
path_to_gadm, co, gadm_layer_id, gadm_clustering, admin_shapes=None
):
"""
Parameters
----------
Expand All @@ -1238,7 +1240,7 @@ def _get_shape_col_gdf(path_to_gadm, co, gadm_layer_id, gadm_clustering):
When the requested layer_id is greater than the last available layer, then the last layer is selected.
When a negative value is requested, then, the last layer is requested
"""
from build_shapes import get_GADM_layer
from build_shapes import get_GADM_layer, get_geoboundaries_layer

col = "name"
if not gadm_clustering:
Expand All @@ -1256,8 +1258,12 @@ def _get_shape_col_gdf(path_to_gadm, co, gadm_layer_id, gadm_clustering):
lambda name: three_2_two_digits_country(name[:3]) + name[3:]
)
else:
gdf_shapes = get_GADM_layer(co, gadm_layer_id)
col = "GID_{}".format(gadm_layer_id)
if admin_shapes == "geoboundaries":
gdf_shapes = get_geoboundaries_layer(co, gadm_layer_id)
col = "country"
else:
gdf_shapes = get_GADM_layer(co, gadm_layer_id)
col = "GID_{}".format(gadm_layer_id)
gdf_shapes = gdf_shapes[gdf_shapes[col].str.contains(co)]
return gdf_shapes, col

Expand All @@ -1269,6 +1275,7 @@ def locate_bus(
path_to_gadm=None,
gadm_clustering=False,
dropnull=True,
admin_shapes=None,
col_out=None,
):
"""
Expand Down Expand Up @@ -1297,7 +1304,7 @@ def locate_bus(
df[col_out] = None
for co in countries:
gdf_shape, col = _get_shape_col_gdf(
path_to_gadm, co, gadm_level, gadm_clustering
path_to_gadm, co, gadm_level, gadm_clustering, admin_shapes=admin_shapes
)
sub_df = df.loc[df.country == co, ["x", "y", "country"]]
gdf = gpd.GeoDataFrame(
Expand Down
162 changes: 148 additions & 14 deletions scripts/build_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

# -*- coding: utf-8 -*-

import json
import multiprocessing as mp
import os
import shutil
Expand Down Expand Up @@ -1363,6 +1364,124 @@ def crop_country(gadm_shapes, subregion_config):
)


def get_geoboundaries_layer(countries, layer_id, geo_crs):

# initialization of the geoDataFrame
geodf_list = []

for country in countries:
print(f"retrieving {country}")
country_code = two_2_three_digits_country(country)
url_template = (
f"https://www.geoboundaries.org/api/current/gbOpen/{country_code}/ALL/"
)

# Download the GeoJSON file
response = requests.get(url_template)
max_level = min(len(json.loads(response.text)), layer_id)
gdf_base = gpd.GeoDataFrame()

for i in reversed(range(max_level)):
path_geojson = json.loads(response.content)[i]["gjDownloadURL"]
gdf = gpd.read_file(path_geojson) # .to_crs("EPSG:3857") #distance
gdf = gdf.rename(columns={"shapeName": f"shapeName_{i}"})

if gdf_base.empty:
gdf_base = gdf
gdf_base[f"index_{i}"] = gdf_base.index
gdf_base["center"] = gdf_base.centroid
continue

# TODO: Set a clause for shapes where the center is not in a shape (islands)
# Can use distance for example gdf.distance(center).sort_values().index[0]
# idxmax was used because its faster
gdf_base[f"index_{i}"] = [
gdf.contains(center).idxmax() for center in gdf_base.center
]
gdf_base[f"shapeName_{i}"] = [
gdf.loc[index, f"shapeName_{i}"] for index in gdf_base[f"index_{i}"]
]

if i == 1:
gdf_base["shapeISO"] = [
gdf.loc[index, "shapeISO"] for index in gdf_base[f"index_{i}"]
]

psuedo_GADM_ID = []
for i in gdf_base.index:
base_string = country + "." + str(gdf_base.loc[i, "index_0"])
for j in range(1, max_level):
base_string += "." + str(gdf_base.loc[i, f"index_{j}"])
psuedo_GADM_ID.append(base_string + "_1")

gdf_base["GADM_ID"] = psuedo_GADM_ID
gdf_base["country"] = country
gdf_base = gdf_base[
[f"shapeName_{i}" for i in range(max_level)]
+ ["GADM_ID", "country", "geometry"]
]

# append geodataframes
geodf_list.append(gdf_base)

geodf_geoboundaries = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True))
geodf_geoboundaries.set_crs(geo_crs)

return geodf_geoboundaries


def geoboundaries(
worldpop_method,
gdp_method,
countries,
geo_crs,
mem_mb,
layer_id=2,
update=False,
out_logging=False,
year=2020,
nprocesses=None,
simplify_gadm=True,
):

df_geob = get_geoboundaries_layer(countries, layer_id, geo_crs)

if worldpop_method != False:
mem_read_limit_per_process = mem_mb / nprocesses
# add the population data to the dataset
add_population_data(
df_geob,
countries,
worldpop_method,
year,
update,
out_logging,
mem_read_limit_per_process,
nprocesses=nprocesses,
)

if gdp_method != False:
# add the gdp data to the dataset
add_gdp_data(
df_geob,
year,
update,
out_logging,
name_file_nc="GDP_PPP_1990_2015_5arcmin_v2.nc",
)

df_geob.set_index("GADM_ID", inplace=True)

if simplify_gadm:
df_geob["geometry"] = df_geob["geometry"].map(_simplify_polys)
df_geob.geometry = df_geob.geometry.apply(
lambda r: make_valid(r) if not r.is_valid else r
)
df_geob = df_geob[df_geob.geometry.is_valid & ~df_geob.geometry.is_empty]

return df_geob


if __name__ == "__main__":
if "snakemake" not in globals():
from _helpers import mock_snakemake
Expand Down Expand Up @@ -1409,20 +1528,35 @@ def crop_country(gadm_shapes, subregion_config):
)
africa_shape.reset_index().to_file(snakemake.output.africa_shape)

gadm_shapes = gadm(
worldpop_method,
gdp_method,
countries_list,
geo_crs,
contended_flag,
mem_mb,
layer_id,
update,
out_logging,
year,
nprocesses=nprocesses,
simplify_gadm=simplify_gadm,
)
if snakemake.params.build_shape_options["admin_shape"] == "geoboundaries":
gadm_shapes = geoboundaries(
worldpop_method,
gdp_method,
countries_list,
geo_crs,
mem_mb,
layer_id,
update,
out_logging,
year,
nprocesses=nprocesses,
simplify_gadm=simplify_gadm,
)
else:
gadm_shapes = gadm(
worldpop_method,
gdp_method,
countries_list,
geo_crs,
contended_flag,
mem_mb,
layer_id,
update,
out_logging,
year,
nprocesses=nprocesses,
simplify_gadm=simplify_gadm,
)

save_to_geojson(gadm_shapes, out.gadm_shapes)

Expand Down
Loading
Loading