diff --git a/Snakefile b/Snakefile index f3167ca23..cda113b3c 100644 --- a/Snakefile +++ b/Snakefile @@ -1039,33 +1039,76 @@ rule prepare_transport_data_input: if not config["custom_data"]["gas_network"]: + alternative_gas_clustering = {} + + # Include all parameters if alternative_clustering is True + if config["cluster_options"]["alternative_clustering"]: + alternative_gas_clustering.update( + { + "admin_shape": config["build_shape_options"]["admin_shape"], + "countries_list": config["countries"], + "layer_id": config["build_shape_options"]["gadm_layer_id"], + "update": config["build_shape_options"]["update_file"], + "out_logging": config["build_shape_options"]["out_logging"], + "year": config["build_shape_options"]["year"], + "nprocesses": config["build_shape_options"]["nprocesses"], + "contended_flag": config["build_shape_options"]["contended_flag"], + "geo_crs": config["crs"]["geo_crs"], + } + ) + rule prepare_gas_network: params: + **alternative_gas_clustering, gas_config=config["sector"]["gas"], alternative_clustering=config["cluster_options"]["alternative_clustering"], - countries_list=config["countries"], - layer_id=config["build_shape_options"]["gadm_layer_id"], - update=config["build_shape_options"]["update_file"], - out_logging=config["build_shape_options"]["out_logging"], - year=config["build_shape_options"]["year"], - nprocesses=config["build_shape_options"]["nprocesses"], - contended_flag=config["build_shape_options"]["contended_flag"], - geo_crs=config["crs"]["geo_crs"], custom_gas_network=config["custom_data"]["gas_network"], input: regions_onshore="resources/" + RDIR + "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson", output: + preclustered_gas_network="resources/" + + SECDIR + + "gas_networks/pre_gas_network_elec_s{simpl}_{clusters}.csv", clustered_gas_network="resources/" + SECDIR + "gas_networks/gas_network_elec_s{simpl}_{clusters}.csv", - # TODO: Should be a own snakemake rule - # gas_network_fig_1="resources/gas_networks/existing_gas_pipelines_{simpl}_{clusters}.png", - # gas_network_fig_2="resources/gas_networks/clustered_gas_pipelines_{simpl}_{clusters}.png", + bus_regions_onshore="resources/" + + RDIR + + "gas_networks/gas_regions_onshore_elec_s{simpl}_{clusters}.geojson", + country_borders="resources/" + + RDIR + + "gas_networks/country_borders_elec_s{simpl}_{clusters}.geojson", script: "scripts/prepare_gas_network.py" + rule plot_gas_network: + params: + gas_network_data=config["sector"]["gas"]["network_data"], + input: + preclustered_gas_network="resources/" + + SECDIR + + "gas_networks/pre_gas_network_elec_s{simpl}_{clusters}.csv", + clustered_gas_network="resources/" + + SECDIR + + "gas_networks/gas_network_elec_s{simpl}_{clusters}.csv", + bus_regions_onshore="resources/" + + RDIR + + "gas_networks/gas_regions_onshore_elec_s{simpl}_{clusters}.geojson", + country_borders="resources/" + + RDIR + + "gas_networks/country_borders_elec_s{simpl}_{clusters}.geojson", + output: + existing_gas_pipelines="resources/" + + RDIR + + "gas_networks/existing_gas_pipelines_s{simpl}_{clusters}.png", + clustered_gas_pipelines="resources/" + + RDIR + + "gas_networks/clustered_gas_pipelines_s{simpl}_{clusters}.png", + script: + "scripts/plot_gas_network.py" + rule prepare_sector_network: params: diff --git a/config.default.yaml b/config.default.yaml index 0dc98600c..c2530b30c 100644 --- a/config.default.yaml +++ b/config.default.yaml @@ -106,6 +106,7 @@ cluster_options: efficiency: mean build_shape_options: + admin_shape: "geoboundaries" # "geoboundaries" or "GADM" Datasource for geographic administrations. If not selected then "GADM" is chosen gadm_layer_id: 1 # GADM level area used for the gadm_shapes. Codes are country-dependent but roughly: 0: country, 1: region/county-like, 2: municipality-like simplify_gadm: true # When true, shape polygons are simplified else no update_file: false # When true, all the input files are downloaded again and replace the existing files diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 43f3b11b8..001a77b15 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -649,7 +649,7 @@ def two_2_three_digits_country(two_code_country): if two_code_country == "SN-GM": return f"{two_2_three_digits_country('SN')}-{two_2_three_digits_country('GM')}" - three_code_country = coco.convert(two_code_country, to="ISO3") + three_code_country = coco.convert(two_code_country, to="ISO3", not_found=None) return three_code_country @@ -670,7 +670,7 @@ def three_2_two_digits_country(three_code_country): if three_code_country == "SEN-GMB": return f"{three_2_two_digits_country('SN')}-{three_2_two_digits_country('GM')}" - two_code_country = coco.convert(three_code_country, to="ISO2") + two_code_country = coco.convert(three_code_country, to="ISO2", not_found=None) return two_code_country @@ -697,7 +697,7 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word if two_code_country == "SN-GM": return f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}" - full_name = coco.convert(two_code_country, to="name_short") + full_name = coco.convert(two_code_country, to="name_short", not_found=None) if nocomma: # separate list by delim @@ -740,7 +740,7 @@ def country_name_2_two_digits(country_name): ): return "SN-GM" - full_name = coco.convert(country_name, to="ISO2") + full_name = coco.convert(country_name, to="ISO2", not_found=None) return full_name @@ -1227,7 +1227,9 @@ def download_GADM(country_code, update=False, out_logging=False): return GADM_inputfile_gpkg, GADM_filename -def _get_shape_col_gdf(path_to_gadm, co, gadm_layer_id, gadm_clustering): +def _get_shape_col_gdf( + path_to_gadm, co, gadm_layer_id, gadm_clustering, admin_shapes=None +): """ Parameters ---------- @@ -1238,7 +1240,7 @@ def _get_shape_col_gdf(path_to_gadm, co, gadm_layer_id, gadm_clustering): When the requested layer_id is greater than the last available layer, then the last layer is selected. When a negative value is requested, then, the last layer is requested """ - from build_shapes import get_GADM_layer + from build_shapes import get_GADM_layer, get_geoboundaries_layer col = "name" if not gadm_clustering: @@ -1256,8 +1258,12 @@ def _get_shape_col_gdf(path_to_gadm, co, gadm_layer_id, gadm_clustering): lambda name: three_2_two_digits_country(name[:3]) + name[3:] ) else: - gdf_shapes = get_GADM_layer(co, gadm_layer_id) - col = "GID_{}".format(gadm_layer_id) + if admin_shapes == "geoboundaries": + gdf_shapes = get_geoboundaries_layer(co, gadm_layer_id) + col = "country" + else: + gdf_shapes = get_GADM_layer(co, gadm_layer_id) + col = "GID_{}".format(gadm_layer_id) gdf_shapes = gdf_shapes[gdf_shapes[col].str.contains(co)] return gdf_shapes, col @@ -1269,6 +1275,7 @@ def locate_bus( path_to_gadm=None, gadm_clustering=False, dropnull=True, + admin_shapes=None, col_out=None, ): """ @@ -1297,7 +1304,7 @@ def locate_bus( df[col_out] = None for co in countries: gdf_shape, col = _get_shape_col_gdf( - path_to_gadm, co, gadm_level, gadm_clustering + path_to_gadm, co, gadm_level, gadm_clustering, admin_shapes=admin_shapes ) sub_df = df.loc[df.country == co, ["x", "y", "country"]] gdf = gpd.GeoDataFrame( diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index 9dc1f5c33..a4c731a0f 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -5,6 +5,7 @@ # -*- coding: utf-8 -*- +import json import multiprocessing as mp import os import shutil @@ -1363,6 +1364,124 @@ def crop_country(gadm_shapes, subregion_config): ) +def get_geoboundaries_layer(countries, layer_id, geo_crs): + + # initialization of the geoDataFrame + geodf_list = [] + + for country in countries: + print(f"retrieving {country}") + country_code = two_2_three_digits_country(country) + url_template = ( + f"https://www.geoboundaries.org/api/current/gbOpen/{country_code}/ALL/" + ) + + # Download the GeoJSON file + response = requests.get(url_template) + max_level = min(len(json.loads(response.text)), layer_id) + gdf_base = gpd.GeoDataFrame() + + for i in reversed(range(max_level)): + path_geojson = json.loads(response.content)[i]["gjDownloadURL"] + gdf = gpd.read_file(path_geojson) # .to_crs("EPSG:3857") #distance + gdf = gdf.rename(columns={"shapeName": f"shapeName_{i}"}) + + if gdf_base.empty: + gdf_base = gdf + gdf_base[f"index_{i}"] = gdf_base.index + gdf_base["center"] = gdf_base.centroid + continue + + # TODO: Set a clause for shapes where the center is not in a shape (islands) + # Can use distance for example gdf.distance(center).sort_values().index[0] + # idxmax was used because its faster + gdf_base[f"index_{i}"] = [ + gdf.contains(center).idxmax() for center in gdf_base.center + ] + gdf_base[f"shapeName_{i}"] = [ + gdf.loc[index, f"shapeName_{i}"] for index in gdf_base[f"index_{i}"] + ] + + if i == 1: + gdf_base["shapeISO"] = [ + gdf.loc[index, "shapeISO"] for index in gdf_base[f"index_{i}"] + ] + + psuedo_GADM_ID = [] + for i in gdf_base.index: + base_string = country + "." + str(gdf_base.loc[i, "index_0"]) + for j in range(1, max_level): + base_string += "." + str(gdf_base.loc[i, f"index_{j}"]) + psuedo_GADM_ID.append(base_string + "_1") + + gdf_base["GADM_ID"] = psuedo_GADM_ID + gdf_base["country"] = country + gdf_base = gdf_base[ + [f"shapeName_{i}" for i in range(max_level)] + + ["GADM_ID", "country", "geometry"] + ] + + # append geodataframes + geodf_list.append(gdf_base) + + geodf_geoboundaries = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True)) + geodf_geoboundaries.set_crs(geo_crs) + + return geodf_geoboundaries + + +def geoboundaries( + worldpop_method, + gdp_method, + countries, + geo_crs, + mem_mb, + layer_id=2, + update=False, + out_logging=False, + year=2020, + nprocesses=None, + simplify_gadm=True, +): + + df_geob = get_geoboundaries_layer(countries, layer_id, geo_crs) + + if worldpop_method != False: + mem_read_limit_per_process = mem_mb / nprocesses + # add the population data to the dataset + add_population_data( + df_geob, + countries, + worldpop_method, + year, + update, + out_logging, + mem_read_limit_per_process, + nprocesses=nprocesses, + ) + + if gdp_method != False: + # add the gdp data to the dataset + add_gdp_data( + df_geob, + year, + update, + out_logging, + name_file_nc="GDP_PPP_1990_2015_5arcmin_v2.nc", + ) + + df_geob.set_index("GADM_ID", inplace=True) + + if simplify_gadm: + df_geob["geometry"] = df_geob["geometry"].map(_simplify_polys) + df_geob.geometry = df_geob.geometry.apply( + lambda r: make_valid(r) if not r.is_valid else r + ) + df_geob = df_geob[df_geob.geometry.is_valid & ~df_geob.geometry.is_empty] + + return df_geob + + if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake @@ -1409,20 +1528,35 @@ def crop_country(gadm_shapes, subregion_config): ) africa_shape.reset_index().to_file(snakemake.output.africa_shape) - gadm_shapes = gadm( - worldpop_method, - gdp_method, - countries_list, - geo_crs, - contended_flag, - mem_mb, - layer_id, - update, - out_logging, - year, - nprocesses=nprocesses, - simplify_gadm=simplify_gadm, - ) + if snakemake.params.build_shape_options["admin_shape"] == "geoboundaries": + gadm_shapes = geoboundaries( + worldpop_method, + gdp_method, + countries_list, + geo_crs, + mem_mb, + layer_id, + update, + out_logging, + year, + nprocesses=nprocesses, + simplify_gadm=simplify_gadm, + ) + else: + gadm_shapes = gadm( + worldpop_method, + gdp_method, + countries_list, + geo_crs, + contended_flag, + mem_mb, + layer_id, + update, + out_logging, + year, + nprocesses=nprocesses, + simplify_gadm=simplify_gadm, + ) save_to_geojson(gadm_shapes, out.gadm_shapes) diff --git a/scripts/plot_gas_network.py b/scripts/plot_gas_network.py new file mode 100644 index 000000000..3d1cd632f --- /dev/null +++ b/scripts/plot_gas_network.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later +""" +Prepare gas network. +""" + +import logging + +logger = logging.getLogger(__name__) + +import geopandas as gpd +import matplotlib.colors as colors +import matplotlib.pyplot as plt +import pandas as pd +from _helpers import ( + three_2_two_digits_country, +) +from matplotlib.lines import Line2D +from shapely import wkt +from shapely.geometry import LineString + + +def plot_gas_network(pipelines, country_borders, bus_regions_onshore, gas_network_data): + df = pipelines.copy() + df = gpd.overlay(df, country_borders, how="intersection") + + if gas_network_data == "IGGIELGN": + df = df.rename({"p_nom": "capacity [MW]"}, axis=1) + + fig, ax = plt.subplots(1, 1) + fig.set_size_inches(12, 7) + bus_regions_onshore.to_crs(epsg=3857).plot( + ax=ax, color="white", edgecolor="darkgrey", linewidth=0.5 + ) + df.loc[(df.amount_states_passed > 1)].to_crs(epsg=3857).plot( + ax=ax, + column="capacity [MW]", + linewidth=2.5, + # linewidth=df['capacity [MW]'], + # alpha=0.8, + categorical=False, + cmap="viridis_r", + # legend=True, + # legend_kwds={'label':'Pipeline capacity [MW]'}, + ) + + df.loc[(df.amount_states_passed <= 1)].to_crs(epsg=3857).plot( + ax=ax, + column="capacity [MW]", + linewidth=2.5, + # linewidth=df['capacity [MW]'], + alpha=0.5, + categorical=False, + # color='darkgrey', + ls="dotted", + ) + + # # Create custom legend handles for line types + # line_types = [ 'solid', 'dashed', 'dotted'] # solid + # legend_handles = [Line2D([0], [0], color='black', linestyle=line_type) for line_type in line_types] + + # Define line types and labels + line_types = ["solid", "dotted"] + line_labels = ["Operating", "Not considered \n(within-state)"] + + # Create custom legend handles for line types + legend_handles = [ + Line2D([0], [0], color="black", linestyle=line_type, label=line_label) + for line_type, line_label in zip(line_types, line_labels) + ] + + # Add the line type legend + ax.legend( + handles=legend_handles, + title="Status", + borderpad=1, + title_fontproperties={"weight": "bold"}, + fontsize=11, + loc=1, + ) + + # # create the colorbar + norm = colors.Normalize( + vmin=df["capacity [MW]"].min(), vmax=df["capacity [MW]"].max() + ) + cbar = plt.cm.ScalarMappable(norm=norm, cmap="viridis_r") + # fig.colorbar(cbar, ax=ax).set_label('Capacity [MW]') + + # add colorbar + ax_cbar = fig.colorbar(cbar, ax=ax, location="left", shrink=0.8, pad=0.01) + # add label for the colorbar + ax_cbar.set_label("Natural gas pipeline capacity [MW]", fontsize=15) + + ax.set_axis_off() + fig.savefig(snakemake.output.existing_gas_pipelines, dpi=300, bbox_inches="tight") + + +def plot_clustered_gas_network(pipelines, bus_regions_onshore): + # Create a new GeoDataFrame with centroids + centroids = bus_regions_onshore.copy() + centroids["geometry"] = centroids["geometry"].centroid + centroids["gadm_id"] = centroids["gadm_id"].apply( + lambda id: three_2_two_digits_country(id[:3]) + id[3:] + ) + gdf1 = pd.merge( + pipelines, centroids, left_on=["bus0"], right_on=["gadm_id"], how="left" + ) + gdf1.rename(columns={"geometry": "geometry_bus0"}, inplace=True) + pipe_links = pd.merge( + gdf1, centroids, left_on=["bus1"], right_on=["gadm_id"], how="left" + ) + pipe_links.rename(columns={"geometry": "geometry_bus1"}, inplace=True) + + # Create LineString geometries from the points + pipe_links["geometry"] = pipe_links.apply( + lambda row: LineString([row["geometry_bus0"], row["geometry_bus1"]]), axis=1 + ) + + clustered = gpd.GeoDataFrame(pipe_links, geometry=pipe_links["geometry"]) + + # Optional: Set the coordinate reference system (CRS) if needed + clustered.crs = "EPSG:3857" # For example, WGS84 + + # plot pipelines + fig, ax = plt.subplots(1, 1) + fig.set_size_inches(12, 7) + bus_regions_onshore.to_crs(epsg=3857).plot( + ax=ax, color="white", edgecolor="darkgrey", linewidth=0.5 + ) + clustered.to_crs(epsg=3857).plot( + ax=ax, + column="capacity", + linewidth=1.5, + categorical=False, + cmap="plasma", + ) + + centroids.to_crs(epsg=3857).plot( + ax=ax, + color="red", + markersize=35, + alpha=0.5, + ) + + norm = colors.Normalize( + vmin=pipelines["capacity"].min(), vmax=pipelines["capacity"].max() + ) + cbar = plt.cm.ScalarMappable(norm=norm, cmap="plasma") + + # add colorbar + ax_cbar = fig.colorbar(cbar, ax=ax, location="left", shrink=0.8, pad=0.01) + # add label for the colorbar + ax_cbar.set_label("Natural gas pipeline capacity [MW]", fontsize=15) + + ax.set_axis_off() + fig.savefig(snakemake.output.clustered_gas_pipelines, dpi=300, bbox_inches="tight") + + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake( + "prepare_gas_network", + simpl="", + clusters="4", + ) + # configure_logging(snakemake) + + gas_network_data = snakemake.params.gas_network_data + + bus_regions_onshore = gpd.read_file(snakemake.input.bus_regions_onshore) + country_borders = gpd.read_file(snakemake.input.country_borders) + + pipelines = pd.read_csv(snakemake.input.preclustered_gas_network) + pipelines["geometry"] = pipelines["geometry"].apply(wkt.loads) + pipelines = gpd.GeoDataFrame(pipelines, geometry="geometry", crs="epsg:3857") + + clustered_pipelines = pd.read_csv(snakemake.input.clustered_gas_network) + + plot_gas_network(pipelines, country_borders, bus_regions_onshore, gas_network_data) + plot_clustered_gas_network(clustered_pipelines, bus_regions_onshore) diff --git a/scripts/prepare_gas_network.py b/scripts/prepare_gas_network.py index e840795da..eb04d11fd 100644 --- a/scripts/prepare_gas_network.py +++ b/scripts/prepare_gas_network.py @@ -14,42 +14,20 @@ import zipfile from pathlib import Path -import fiona import geopandas as gpd -import matplotlib.colors as colors -import matplotlib.pyplot as plt import pandas as pd from _helpers import ( BASE_DIR, content_retrieve, progress_retrieve, three_2_two_digits_country, - two_2_three_digits_country, ) +from build_shapes import * from build_shapes import gadm -from matplotlib.lines import Line2D from pyproj import CRS from pypsa.geo import haversine_pts from shapely.geometry import LineString, Point from shapely.ops import unary_union -from shapely.validation import make_valid - -if __name__ == "__main__": - if "snakemake" not in globals(): - from _helpers import mock_snakemake - - snakemake = mock_snakemake( - "prepare_gas_network", - simpl="", - clusters="4", - ) - - # configure_logging(snakemake) - - # run = snakemake.config.get("run", {}) - # RDIR = run["name"] + "/" if run.get("name") else "" - # store_path_data = Path.joinpath(Path().cwd(), "data") - # country_list = country_list_to_geofk(snakemake.config["countries"])' def download_IGGIELGN_gas_network(): @@ -162,14 +140,14 @@ def correct_Diameter_col(value): return float(value) -def prepare_GGIT_data(GGIT_gas_pipeline): +def prepare_GGIT_data(GGIT_gas_pipeline, GGIT_status): df = GGIT_gas_pipeline.copy().reset_index() # Drop rows containing "--" in the 'WKTFormat' column df = df[df["WKTFormat"] != "--"] # Keep pipelines that are as below - df = df[df["Status"].isin(snakemake.params.gas_config["network_data_GGIT_status"])] + df = df[df["Status"].isin(GGIT_status)] # Convert the WKT column to a GeoDataFrame df = gpd.GeoDataFrame(df, geometry=gpd.GeoSeries.from_wkt(df["WKTFormat"])) @@ -303,226 +281,7 @@ def prepare_IGGIELGN_data( return df -def get_GADM_filename(country_code): - """ - Function to get the GADM filename given the country code. - """ - special_codes_GADM = { - "XK": "XKO", # kosovo - "CP": "XCL", # clipperton island - "SX": "MAF", # sint maartin - "TF": "ATF", # french southern territories - "AX": "ALA", # aland - "IO": "IOT", # british indian ocean territory - "CC": "CCK", # cocos island - "NF": "NFK", # norfolk - "PN": "PCN", # pitcairn islands - "JE": "JEY", # jersey - "XS": "XSP", # spratly - "GG": "GGY", # guernsey - "UM": "UMI", # united states minor outlying islands - "SJ": "SJM", # svalbard - "CX": "CXR", # Christmas island - } - - if country_code in special_codes_GADM: - return f"gadm41_{special_codes_GADM[country_code]}" - else: - return f"gadm41_{two_2_three_digits_country(country_code)}" - - -def download_GADM(country_code, update=False, out_logging=False): - """ - Download gpkg file from GADM for a given country code. - - Parameters - ---------- - country_code : str - Two letter country codes of the downloaded files - update : bool - Update = true, forces re-download of files - - Returns - ------- - gpkg file per country - """ - - GADM_filename = get_GADM_filename(country_code) - - GADM_inputfile_gpkg = os.path.join( - BASE_DIR, - "data", - "gadm", - GADM_filename, - GADM_filename + ".gpkg", - ) # Input filepath gpkg - - return GADM_inputfile_gpkg, GADM_filename - - -def filter_gadm( - geodf, - layer, - cc, - contended_flag, - output_nonstd_to_csv=False, -): - # identify non standard geodf rows - geodf_non_std = geodf[geodf["GID_0"] != two_2_three_digits_country(cc)].copy() - - if not geodf_non_std.empty: - logger.info( - f"Contended areas have been found for gadm layer {layer}. They will be treated according to {contended_flag} option" - ) - - # NOTE: in these options GID_0 is not changed because it is modified below - if contended_flag == "drop": - geodf.drop(geodf_non_std.index, inplace=True) - elif contended_flag != "set_by_country": - # "set_by_country" option is the default; if this elif applies, the desired option falls back to the default - logger.warning( - f"Value '{contended_flag}' for option contented_flag is not recognized.\n" - + "Fallback to 'set_by_country'" - ) - - # force GID_0 to be the country code for the relevant countries - geodf["GID_0"] = cc - - # country shape should have a single geometry - if (layer == 0) and (geodf.shape[0] > 1): - logger.warning( - f"Country shape is composed by multiple shapes that are being merged in agreement to contented_flag option '{contended_flag}'" - ) - # take the first row only to re-define geometry keeping other columns - geodf = geodf.iloc[[0]].set_geometry([geodf.unary_union]) - - # debug output to file - if output_nonstd_to_csv and not geodf_non_std.empty: - geodf_non_std.to_csv( - f"resources/non_standard_gadm{layer}_{cc}_raw.csv", index=False - ) - - return geodf - - -def get_GADM_layer( - country_list, - layer_id, - geo_crs, - contended_flag, - update=False, - outlogging=False, -): - """ - Function to retrieve a specific layer id of a geopackage for a selection of - countries. - - Parameters - ---------- - country_list : str - List of the countries - layer_id : int - Layer to consider in the format GID_{layer_id}. - When the requested layer_id is greater than the last available layer, then the last layer is selected. - When a negative value is requested, then, the last layer is requested - """ - # initialization of the geoDataFrame - geodf_list = [] - - for country_code in country_list: - # Set the current layer id (cur_layer_id) to global layer_id - cur_layer_id = layer_id - - # download file gpkg - file_gpkg, name_file = download_GADM(country_code, update, outlogging) - - # get layers of a geopackage - list_layers = fiona.listlayers(file_gpkg) - - # get layer name - if (cur_layer_id < 0) or (cur_layer_id >= len(list_layers)): - # when layer id is negative or larger than the number of layers, select the last layer - cur_layer_id = len(list_layers) - 1 - - # read gpkg file - geodf_temp = gpd.read_file( - file_gpkg, layer="ADM_ADM_" + str(cur_layer_id) - ).to_crs(geo_crs) - - geodf_temp = filter_gadm( - geodf=geodf_temp, - layer=cur_layer_id, - cc=country_code, - contended_flag=contended_flag, - output_nonstd_to_csv=False, - ) - - if layer_id == 0: - geodf_temp["GADM_ID"] = geodf_temp[f"GID_{cur_layer_id}"].apply( - lambda x: two_2_three_digits_country(x[:2]) - ) + pd.Series(range(1, geodf_temp.shape[0] + 1)).astype(str) - else: - # create a subindex column that is useful - # in the GADM processing of sub-national zones - # Fix issues with missing "." in selected cases - geodf_temp["GADM_ID"] = geodf_temp[f"GID_{cur_layer_id}"].apply( - lambda x: x if x[3] == "." else x[:3] + "." + x[3:] - ) - - # append geodataframes - geodf_list.append(geodf_temp) - - geodf_GADM = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True)) - geodf_GADM.set_crs(geo_crs) - - return geodf_GADM - - -def gadm( - countries, - geo_crs, - contended_flag, - layer_id=2, - update=False, - out_logging=False, - year=2020, - nprocesses=None, -): - if out_logging: - logger.info("Stage 4/4: Creation GADM GeoDataFrame") - - # download data if needed and get the desired layer_id - df_gadm = get_GADM_layer(countries, layer_id, geo_crs, contended_flag, update) - - # select and rename columns - df_gadm.rename(columns={"GID_0": "country"}, inplace=True) - - # drop useless columns - df_gadm.drop( - df_gadm.columns.difference(["country", "GADM_ID", "geometry"]), - axis=1, - inplace=True, - errors="ignore", - ) - - # renaming 3 letter to 2 letter ISO code before saving GADM file - # solves issue: https://github.com/pypsa-meets-earth/pypsa-earth/issues/671 - # df_gadm["GADM_ID"] = ( - # df_gadm["GADM_ID"] - # .str.split(".") - # .apply(lambda id: three_2_two_digits_country(id[0]) + "." + ".".join(id[1:])) - # ) - # df_gadm.set_index("GADM_ID", inplace=True) - # df_gadm["geometry"] = df_gadm["geometry"].map(_simplify_polys) - df_gadm.geometry = df_gadm.geometry.apply( - lambda r: make_valid(r) if not r.is_valid else r - ) - df_gadm = df_gadm[df_gadm.geometry.is_valid & ~df_gadm.geometry.is_empty] - - return df_gadm - - -def load_bus_region(onshore_path, pipelines): +def load_bus_region(onshore_path, pipelines, config): """ Load pypsa-earth-sec onshore regions. @@ -536,28 +295,47 @@ def load_bus_region(onshore_path, pipelines): :, ["gadm_id", "geometry"] ] - if snakemake.params.alternative_clustering: - countries_list = snakemake.params.countries_list - layer_id = snakemake.params.layer_id - update = snakemake.params.update - out_logging = snakemake.params.out_logging - year = snakemake.params.year - nprocesses = snakemake.params.nprocesses - contended_flag = snakemake.params.contended_flag - geo_crs = snakemake.params.geo_crs - - bus_regions_onshore = gadm( - countries_list, - geo_crs, - contended_flag, - layer_id, - update, - out_logging, - year, - nprocesses=nprocesses, - ) + if config["alternative_clustering"]: + countries_list = config["countries_list"] + layer_id = config["layer_id"] + update = config["update"] + out_logging = config["out_logging"] + year = config["year"] + nprocesses = config["nprocesses"] + contended_flag = config["contended_flag"] + geo_crs = config["geo_crs"] + + if config["admin_shape"] == "geoboundaries": + bus_regions_onshore = geoboundaries( + False, + False, + countries_list, + geo_crs, + False, + layer_id, + update, + out_logging, + year, + nprocesses=nprocesses, + simplify_gadm=False, + ) + else: + bus_regions_onshore = gadm( + False, + False, + countries_list, + geo_crs, + contended_flag, + False, + layer_id, + update, + out_logging, + year, + nprocesses=nprocesses, + simplify_gadm=False, + ) - # bus_regions_onshore = bus_regions_onshore.reset_index() + bus_regions_onshore = bus_regions_onshore.reset_index() bus_regions_onshore = bus_regions_onshore.rename(columns={"GADM_ID": "gadm_id"}) # Conversion of GADM id to from 3 to 2-digit # bus_regions_onshore["gadm_id"] = bus_regions_onshore["gadm_id"].apply( @@ -749,200 +527,89 @@ def check_existence(row): return merged_df -# TODO: Move it to a separate plotting rule! -# def plot_gas_network(pipelines, country_borders, bus_regions_onshore): -# df = pipelines.copy() -# df = gpd.overlay(df, country_borders, how="intersection") - -# if snakemake.params.gas_config["network_data"] == "IGGIELGN": -# df = df.rename({"p_nom": "capacity [MW]"}, axis=1) - -# fig, ax = plt.subplots(1, 1) -# fig.set_size_inches(12, 7) -# bus_regions_onshore.to_crs(epsg=3857).plot( -# ax=ax, color="white", edgecolor="darkgrey", linewidth=0.5 -# ) -# df.loc[(df.amount_states_passed > 1)].to_crs(epsg=3857).plot( -# ax=ax, -# column="capacity [MW]", -# linewidth=2.5, -# # linewidth=df['capacity [MW]'], -# # alpha=0.8, -# categorical=False, -# cmap="viridis_r", -# # legend=True, -# # legend_kwds={'label':'Pipeline capacity [MW]'}, -# ) - -# df.loc[(df.amount_states_passed <= 1)].to_crs(epsg=3857).plot( -# ax=ax, -# column="capacity [MW]", -# linewidth=2.5, -# # linewidth=df['capacity [MW]'], -# alpha=0.5, -# categorical=False, -# # color='darkgrey', -# ls="dotted", -# ) - -# # # Create custom legend handles for line types -# # line_types = [ 'solid', 'dashed', 'dotted'] # solid -# # legend_handles = [Line2D([0], [0], color='black', linestyle=line_type) for line_type in line_types] - -# # Define line types and labels -# line_types = ["solid", "dotted"] -# line_labels = ["Operating", "Not considered \n(within-state)"] - -# # Create custom legend handles for line types -# legend_handles = [ -# Line2D([0], [0], color="black", linestyle=line_type, label=line_label) -# for line_type, line_label in zip(line_types, line_labels) -# ] - -# # Add the line type legend -# ax.legend( -# handles=legend_handles, -# title="Status", -# borderpad=1, -# title_fontproperties={"weight": "bold"}, -# fontsize=11, -# loc=1, -# ) - -# # # create the colorbar -# norm = colors.Normalize( -# vmin=df["capacity [MW]"].min(), vmax=df["capacity [MW]"].max() -# ) -# cbar = plt.cm.ScalarMappable(norm=norm, cmap="viridis_r") -# # fig.colorbar(cbar, ax=ax).set_label('Capacity [MW]') - -# # add colorbar -# ax_cbar = fig.colorbar(cbar, ax=ax, location="left", shrink=0.8, pad=0.01) -# # add label for the colorbar -# ax_cbar.set_label("Natural gas pipeline capacity [MW]", fontsize=15) - -# ax.set_axis_off() -# fig.savefig(snakemake.output.gas_network_fig_1, dpi=300, bbox_inches="tight") - -# TODO: Move it to a separate plotting rule! -# def plot_clustered_gas_network(pipelines, bus_regions_onshore): -# # Create a new GeoDataFrame with centroids -# centroids = bus_regions_onshore.copy() -# centroids["geometry"] = centroids["geometry"].centroid -# centroids["gadm_id"] = centroids["gadm_id"].apply( -# lambda id: three_2_two_digits_country(id[:3]) + id[3:] -# ) -# gdf1 = pd.merge( -# pipelines, centroids, left_on=["bus0"], right_on=["gadm_id"], how="left" -# ) -# gdf1.rename(columns={"geometry": "geometry_bus0"}, inplace=True) -# pipe_links = pd.merge( -# gdf1, centroids, left_on=["bus1"], right_on=["gadm_id"], how="left" -# ) -# pipe_links.rename(columns={"geometry": "geometry_bus1"}, inplace=True) - -# # Create LineString geometries from the points -# pipe_links["geometry"] = pipe_links.apply( -# lambda row: LineString([row["geometry_bus0"], row["geometry_bus1"]]), axis=1 -# ) - -# clustered = gpd.GeoDataFrame(pipe_links, geometry=pipe_links["geometry"]) - -# # Optional: Set the coordinate reference system (CRS) if needed -# clustered.crs = "EPSG:3857" # For example, WGS84 - -# # plot pipelines -# fig, ax = plt.subplots(1, 1) -# fig.set_size_inches(12, 7) -# bus_regions_onshore.to_crs(epsg=3857).plot( -# ax=ax, color="white", edgecolor="darkgrey", linewidth=0.5 -# ) -# clustered.to_crs(epsg=3857).plot( -# ax=ax, -# column="capacity", -# linewidth=1.5, -# categorical=False, -# cmap="plasma", -# ) - -# centroids.to_crs(epsg=3857).plot( -# ax=ax, -# color="red", -# markersize=35, -# alpha=0.5, -# ) - -# norm = colors.Normalize( -# vmin=pipelines["capacity"].min(), vmax=pipelines["capacity"].max() -# ) -# cbar = plt.cm.ScalarMappable(norm=norm, cmap="plasma") - -# # add colorbar -# ax_cbar = fig.colorbar(cbar, ax=ax, location="left", shrink=0.8, pad=0.01) -# # add label for the colorbar -# ax_cbar.set_label("Natural gas pipeline capacity [MW]", fontsize=15) - -# ax.set_axis_off() -# fig.savefig(snakemake.output.gas_network_fig_2, dpi=300, bbox_inches="tight") - - -if not snakemake.params.custom_gas_network: - if snakemake.params.gas_config["network_data"] == "GGIT": - pipelines = download_GGIT_gas_network() - pipelines = prepare_GGIT_data(pipelines) - - elif snakemake.params.gas_config["network_data"] == "IGGIELGN": - download_IGGIELGN_gas_network() - - gas_network = os.path.join( - BASE_DIR, "data/gas_network/scigrid-gas/data/IGGIELGN_PipeSegments.geojson" +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake( + "prepare_gas_network", + simpl="", + clusters="4", ) + # configure_logging(snakemake) - pipelines = load_IGGIELGN_data(gas_network) - pipelines = prepare_IGGIELGN_data(pipelines) + config = snakemake.params + gas_config = snakemake.params.gas_config - bus_regions_onshore = load_bus_region(snakemake.input.regions_onshore, pipelines)[0] - bus_regions_onshore.geometry = bus_regions_onshore.geometry.buffer(0) - country_borders = load_bus_region(snakemake.input.regions_onshore, pipelines)[1] + if not config["custom_gas_network"]: + if gas_config["network_data"] == "GGIT": + pipelines = download_GGIT_gas_network() + pipelines = prepare_GGIT_data( + pipelines, gas_config["network_data_GGIT_status"] + ) - pipelines = parse_states(pipelines, bus_regions_onshore) + elif gas_config["network_data"] == "IGGIELGN": + download_IGGIELGN_gas_network() - if len(pipelines.loc[pipelines.amount_states_passed >= 2]) > 0: - # TODO: plotting should be a extra rule! - # plot_gas_network(pipelines, country_borders, bus_regions_onshore) + gas_network = os.path.join( + BASE_DIR, + "data/gas_network/scigrid-gas/data/IGGIELGN_PipeSegments.geojson", + ) - pipelines = cluster_gas_network( - pipelines, bus_regions_onshore, length_factor=1.25 - ) + pipelines = load_IGGIELGN_data(gas_network) + pipelines = prepare_IGGIELGN_data(pipelines) - # Conversion of GADM id to from 3 to 2-digit - pipelines["bus0"] = pipelines["bus0"].apply( - lambda id: three_2_two_digits_country(id[:3]) + id[3:] - ) + bus_regions_onshore = load_bus_region( + snakemake.input.regions_onshore, pipelines, config + )[0] + bus_regions_onshore.geometry = bus_regions_onshore.geometry.buffer(0) + country_borders = load_bus_region( + snakemake.input.regions_onshore, pipelines, config + )[1] - pipelines["bus1"] = pipelines["bus1"].apply( - lambda id: three_2_two_digits_country(id[:3]) + id[3:] - ) + bus_regions_onshore.to_file(snakemake.output.bus_regions_onshore) + country_borders.to_file(snakemake.output.country_borders) - pipelines.to_csv(snakemake.output.clustered_gas_network, index=False) + pipelines = parse_states(pipelines, bus_regions_onshore) + pipelines.to_csv(snakemake.output.preclustered_gas_network, index=False) - # TODO: plotting should be a extra rule! - # plot_clustered_gas_network(pipelines, bus_regions_onshore) + if len(pipelines.loc[pipelines.amount_states_passed >= 2]) > 0: - average_length = pipelines["length"].mean() - print("average_length = ", average_length) + pipelines = cluster_gas_network( + pipelines, bus_regions_onshore, length_factor=1.25 + ) - total_system_capacity = pipelines["GWKm"].sum() - print("total_system_capacity = ", total_system_capacity) + # Conversion of GADM id to from 3 to 2-digit + pipelines["bus0"] = pipelines["bus0"].apply( + lambda id: three_2_two_digits_country(id[:3]) + id[3:] + ) - else: - print( - "The following countries have no existing Natural Gas network between the chosen bus regions:\n" - + ", ".join(bus_regions_onshore.country.unique().tolist()) - ) + pipelines["bus1"] = pipelines["bus1"].apply( + lambda id: three_2_two_digits_country(id[:3]) + id[3:] + ) - # Create an empty DataFrame with the specified column names - pipelines = {"bus0": [], "bus1": [], "capacity": [], "length": [], "GWKm": []} + pipelines.to_csv(snakemake.output.clustered_gas_network, index=False) + + average_length = pipelines["length"].mean() + print("average_length = ", average_length) + + total_system_capacity = pipelines["GWKm"].sum() + print("total_system_capacity = ", total_system_capacity) + + else: + print( + "The following countries have no existing Natural Gas network between the chosen bus regions:\n" + + ", ".join(bus_regions_onshore.country.unique().tolist()) + ) - pipelines = pd.DataFrame(pipelines) - pipelines.to_csv(snakemake.output.clustered_gas_network, index=False) + # Create an empty DataFrame with the specified column names + pipelines = { + "bus0": [], + "bus1": [], + "capacity": [], + "length": [], + "GWKm": [], + } + + pipelines = pd.DataFrame(pipelines) + pipelines.to_csv(snakemake.output.clustered_gas_network, index=False)