pypsa-meets-earth · davide-f · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
@@ -1214,118 +1214,83 @@ def download_GADM(country_code, update=False, out_logging=False):
     return GADM_inputfile_gpkg, GADM_filename
 
 
-def get_GADM_layer(country_list, layer_id, update=False, outlogging=False):
-    """
-    Function to retrieve a specific layer id of a geopackage for a selection of
-    countries.
-
-    Parameters
-    ----------
-    country_list : str
-        List of the countries
-    layer_id : int
-        Layer to consider in the format GID_{layer_id}.
-        When the requested layer_id is greater than the last available layer, then the last layer is selected.
-        When a negative value is requested, then, the last layer is requested
-    """
-    # initialization of the list of geodataframes
-    geodf_list = []
-
-    for country_code in country_list:
-        # download file gpkg
-        file_gpkg, name_file = download_GADM(country_code, update, outlogging)
-
-        # get layers of a geopackage
-        list_layers = fiona.listlayers(file_gpkg)
-
-        # get layer name
-        if layer_id < 0 | layer_id >= len(list_layers):
-            # when layer id is negative or larger than the number of layers, select the last layer
-            layer_id = len(list_layers) - 1
-        code_layer = np.mod(layer_id, len(list_layers))
-        layer_name = (
-            f"gadm36_{two_2_three_digits_country(country_code).upper()}_{code_layer}"
-        )
-
-        # read gpkg file
-        geodf_temp = gpd.read_file(file_gpkg, layer=layer_name)
-
-        # convert country name representation of the main country (GID_0 column)
-        geodf_temp["GID_0"] = [
-            three_2_two_digits_country(twoD_c) for twoD_c in geodf_temp["GID_0"]
-        ]
-
-        # create a subindex column that is useful
-        # in the GADM processing of sub-national zones
-        geodf_temp["GADM_ID"] = geodf_temp[f"GID_{code_layer}"]
+def _get_shape_col_gdf(path_to_gadm, co, gadm_level, gadm_clustering):
+    from build_shapes import get_GADM_layer
 
-        # concatenate geodataframes
-        geodf_list = pd.concat([geodf_list, geodf_temp])
-
-    geodf_GADM = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True))
-    geodf_GADM.set_crs(geodf_list[0].crs, inplace=True)
+    col = "name"
+    if not gadm_clustering:
+        gdf_shapes = gpd.read_file(path_to_gadm)
+    else:
+        if path_to_gadm:
+            gdf_shapes = gpd.read_file(path_to_gadm)
+            if "GADM_ID" in gdf_shapes.columns:
+                col = "GADM_ID"
 
-    return geodf_GADM
+                if gdf_shapes[col][0][
+                    :3
+                ].isalpha():  # TODO clean later by changing all codes to 2 letters
+                    gdf_shapes[col] = gdf_shapes[col].apply(
+                        lambda name: three_2_two_digits_country(name[:3]) + name[3:]
+                    )
+        else:
+            gdf_shapes = get_GADM_layer(co, gadm_level)
+            col = "GID_{}".format(gadm_level)
+    gdf_shapes = gdf_shapes[gdf_shapes[col].str.contains(co)]
+    return gdf_shapes, col
 
 
 def locate_bus(
-    coords,
-    co,
+    df,
+    countries,
     gadm_level,
     path_to_gadm=None,
     gadm_clustering=False,
-    col="name",
+    dropnull=True,
+    col_out=None,
 ):
     """
-    Function to locate the right node for a coordinate set input coords of
-    point.
+    Function to locate the points of the dataframe df into the GADM shapefile.
 
     Parameters
     ----------
-    coords: pandas dataseries
-        dataseries with 2 rows x & y representing the longitude and latitude
-    co: string (code for country where coords are MA Morocco)
-        code of the countries where the coordinates are
+    df: pd.Dataframe
+        Dataframe with mandatory x, y and country columns
+    countries: list
+        List of target countries
+    gadm_level: int
+        GADM level to be used
+    path_to_gadm: str (default None)
+        Path to the GADM shapefile
+    gadm_clustering: bool (default False)
+        True if gadm clustering is adopted
+    dropnull: bool (default True)
+        True if the rows with null values should be dropped
+    col_out: str (default gadm_{gadm_level})
+        Name of the output column
     """
-    col = "name"
-    if not gadm_clustering:
-        gdf = gpd.read_file(path_to_gadm)
-    else:
-        if path_to_gadm:
-            gdf = gpd.read_file(path_to_gadm)
-            if "GADM_ID" in gdf.columns:
-                col = "GADM_ID"
+    if col_out is None:
+        col_out = "gadm_{}".format(gadm_level)
+    df = df[df.country.isin(countries)]
+    df[col_out] = None
+    for co in countries:
+        gdf_shape, col = _get_shape_col_gdf(
+            path_to_gadm, co, gadm_level, gadm_clustering
+        )
+        sub_df = df.loc[df.country == co, ["x", "y", "country"]]
+        gdf = gpd.GeoDataFrame(
+            sub_df,
+            geometry=gpd.points_from_xy(sub_df.x, sub_df.y),
+            crs="EPSG:4326",
+        )
 
-                if gdf[col][0][
-                    :3
-                ].isalpha():  # TODO clean later by changing all codes to 2 letters
-                    gdf[col] = gdf[col].apply(
-                        lambda name: three_2_two_digits_country(name[:3]) + name[3:]
-                    )
-        else:
-            gdf = get_GADM_layer(co, gadm_level)
-            col = "GID_{}".format(gadm_level)
+        gdf_merged = gpd.sjoin_nearest(gdf, gdf_shape, how="inner", rsuffix="right")
 
-        # gdf.set_index("GADM_ID", inplace=True)
-    gdf_co = gdf[
-        gdf[col].str.contains(co)
-    ]  # geodataframe of entire continent - output of prev function {} are placeholders
-    # in strings - conditional formatting
-    # insert any variable into that place using .format - extract string and filter for those containing co (MA)
-    point = Point(coords["x"], coords["y"])  # point object
+        df.loc[gdf_merged.index, col_out] = gdf_merged[col]
 
-    if gdf_co.empty:
-        return None
+    if dropnull:
+        df = df[df[col_out].notnull()]
 
-    try:
-        return gdf_co[gdf_co.contains(point)][
-            col
-        ].item()  # filter gdf_co which contains point and returns the bus
-
-    except ValueError:
-        return gdf_co[gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))][
-            col
-        ].item()  # looks for closest one shape=node
+    return df
 
 
 def get_conv_factors(sector):

diff --git a/scripts/add_export.py b/scripts/add_export.py
@@ -39,25 +39,14 @@ def select_ports(n):
         keep_default_na=False,
     ).squeeze()
 
-    # ports = raw_ports[["name", "country", "fraction", "x", "y"]]
-    # ports.loc[:, "fraction"] = ports.fraction.round(1)
-
-    ports = ports[ports.country.isin(countries)]
-    if len(ports) < 1:
-        logger.error(
-            "No export ports chosen, please add ports to the file data/export_ports.csv"
-        )
     gadm_level = snakemake.params.gadm_level
 
-    ports["gadm_{}".format(gadm_level)] = ports[["x", "y", "country"]].apply(
-        lambda port: locate_bus(
-            port[["x", "y"]],
-            port["country"],
-            gadm_level,
-            snakemake.input["shapes_path"],
-            snakemake.params.alternative_clustering,
-        ),
-        axis=1,
+    ports = locate_bus(
+        ports,
+        countries,
+        gadm_level,
+        snakemake.input.shapes_path,
+        snakemake.params.alternative_clustering,
     )
 
     # TODO: revise if ports quantity and property by shape become relevant
@@ -219,6 +208,7 @@ def create_export_profile():
             discountrate="0.071",
             demand="AB",
             h2export="120",
+            # configfile="test/config.test1.yaml",
         )
 
     overrides = override_component_attrs(snakemake.input.overrides)

diff --git a/scripts/build_industrial_distribution_key.py b/scripts/build_industrial_distribution_key.py
@@ -30,19 +30,10 @@ def map_industry_to_buses(df, countries, gadm_level, shapes_path, gadm_clusterin
     Only cement not steel - proof of concept.
     Change hotmaps to more descriptive name, etc.
     """
-    df = df[df.country.isin(countries)]
-    df["gadm_{}".format(gadm_level)] = df[["x", "y", "country"]].apply(
-        lambda site: locate_bus(
-            site[["x", "y"]].astype("float"),
-            site["country"],
-            gadm_level,
-            shapes_path,
-            gadm_clustering,
-        ),
-        axis=1,
+    df = locate_bus(df, countries, gadm_level, shapes_path, gadm_clustering).set_index(
+        "gadm_" + str(gadm_level)
     )
-
-    return df.set_index("gadm_" + str(gadm_level))
+    return df
 
 
 def build_nodal_distribution_key(
@@ -122,7 +113,6 @@ def match_technology(df):
 
         snakemake = mock_snakemake(
             "build_industrial_distribution_key",
-            simpl="",
             clusters="4",
             planning_horizons=2050,
             demand="AB",

diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py
@@ -111,6 +111,7 @@
 from _helpers import (
     configure_logging,
     create_logger,
+    locate_bus,
     read_csv_nafix,
     to_csv_nafix,
     two_digits_2_name_country,
@@ -371,28 +372,13 @@ def replace_natural_gas_technology(df: pd.DataFrame):
         country_list = snakemake.params.countries
         geo_crs = snakemake.params.geo_crs
 
-        gdf = gpd.read_file(snakemake.input.gadm_shapes)
-
-        def locate_bus(coords, co):
-            gdf_co = gdf[gdf["GADM_ID"].str.contains(co)]
-
-            point = Point(coords["lon"], coords["lat"])
-
-            try:
-                return gdf_co[gdf_co.contains(point)][
-                    "GADM_ID"
-                ].item()  # filter gdf_co which contains point and returns the bus
-
-            except ValueError:
-                return gdf_co[
-                    gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))
-                ][
-                    "GADM_ID"
-                ].item()  # looks for closest one shape=node
-                # fixing https://github.com/pypsa-meets-earth/pypsa-earth/pull/670
-
-        ppl["region_id"] = ppl[["lon", "lat", "Country"]].apply(
-            lambda pp: locate_bus(pp[["lon", "lat"]], pp["Country"]), axis=1
-        )
+        ppl = locate_bus(
+            ppl.rename(columns={"lon": "x", "lat": "y", "Country": "country"}),
+            country_list,
+            gadm_layer_id,
+            snakemake.input.gadm_shapes,
+            snakemake.params.alternative_clustering,
+            col_out="region_id",
+        ).rename(columns={"x": "lon", "y": "lat", "country": "Country"})
 
     ppl.to_csv(snakemake.output.powerplants)
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
@@ -134,6 +134,7 @@
     configure_logging,
     create_logger,
     get_aggregation_strategies,
+    locate_bus,
     update_p_nom_max,
 )
 from add_electricity import load_costs
@@ -379,23 +380,13 @@ def n_bounds(model, *n_id):
 
 
 def busmap_for_gadm_clusters(inputs, n, gadm_level, geo_crs, country_list):
-    gdf = gpd.read_file(inputs.gadm_shapes)
 
-    def locate_bus(coords, co):
-        gdf_co = gdf[gdf["GADM_ID"].str.contains(co)]
-        point = Point(coords["x"], coords["y"])
-
-        try:
-            return gdf_co[gdf_co.contains(point)]["GADM_ID"].item()
-
-        except ValueError:
-            return gdf_co[
-                gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))
-            ]["GADM_ID"].item()
-
-    buses = n.buses
-    buses["gadm_{}".format(gadm_level)] = buses[["x", "y", "country"]].apply(
-        lambda bus: locate_bus(bus[["x", "y"]], bus["country"]), axis=1
+    buses = locate_bus(
+        n.buses,
+        country_list,
+        gadm_level,
+        inputs.gadm_shapes,
+        gadm_clustering=True,
     )
 
     buses["gadm_subnetwork"] = (

diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
@@ -1021,21 +1021,13 @@ def add_aviation(n, cost):
 
     gadm_level = options["gadm_level"]
 
-    airports["gadm_{}".format(gadm_level)] = airports[["x", "y", "country"]].apply(
-        lambda airport: locate_bus(
-            airport[["x", "y"]],
-            airport["country"],
-            gadm_level,
-            snakemake.input.shapes_path,
-            snakemake.config["cluster_options"]["alternative_clustering"],
-        ),
-        axis=1,
-    )
-    # To change 3 country code to 2
-    # airports["gadm_{}".format(gadm_level)] = airports["gadm_{}".format(gadm_level)].apply(
-    # lambda cocode: three_2_two_digits_country(cocode[:3]) + " " + cocode[4:-2])
-
-    airports = airports.set_index("gadm_{}".format(gadm_level))
+    airports = locate_bus(
+        airports,
+        countries,
+        gadm_level,
+        snakemake.input.shapes_path,
+        snakemake.config["cluster_options"]["alternative_clustering"],
+    ).set_index("gadm_{}".format(gadm_level))
 
     ind = pd.DataFrame(n.buses.index[n.buses.carrier == "AC"])
 
@@ -1304,18 +1296,13 @@ def add_shipping(n, costs):
         options["shipping_hydrogen_share"], demand_sc + "_" + str(investment_year)
     )
 
-    ports["gadm_{}".format(gadm_level)] = ports[["x", "y", "country"]].apply(
-        lambda port: locate_bus(
-            port[["x", "y"]],
-            port["country"],
-            gadm_level,
-            snakemake.input["shapes_path"],
-            snakemake.config["cluster_options"]["alternative_clustering"],
-        ),
-        axis=1,
-    )
-
-    ports = ports.set_index("gadm_{}".format(gadm_level))
+    ports = locate_bus(
+        ports,
+        countries,
+        gadm_level,
+        snakemake.input.shapes_path,
+        snakemake.config["cluster_options"]["alternative_clustering"],
+    ).set_index("gadm_{}".format(gadm_level))
 
     ind = pd.DataFrame(n.buses.index[n.buses.carrier == "AC"])
     ind = ind.set_index(n.buses.index[n.buses.carrier == "AC"])