code: new changes based on upstream

pypsa-meets-earth · Nov 14, 2024 · 196d255 · 196d255
1 parent 92ab8f6
commit 196d255
Show file tree

Hide file tree

Showing 12 changed files with 63 additions and 82 deletions.
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
@@ -1408,6 +1408,7 @@ def locate_bus(
     gadm_url_prefix,
     gadm_input_file_args,
     contended_flag,
+    col_name="name",
     path_to_gadm=None,
     update=False,
     out_logging=False,
@@ -1445,8 +1446,9 @@ def locate_bus(
         out_logging = true, enables output logging
     gadm_clustering : bool
         gadm_cluster = true, to enable clustering
+    col_name: str
+        column to use to filter the GeoDataFrame
     """
-    col = "name"
     if not gadm_clustering:
         gdf = gpd.read_file(path_to_gadm)
     else:
@@ -1473,24 +1475,24 @@ def locate_bus(
                 update,
                 out_logging,
             )
-            col = "GID_{}".format(gadm_level)
+            col_name = "GID_{}".format(gadm_level)
 
         # gdf.set_index("GADM_ID", inplace=True)
     gdf_co = gdf[
-        gdf[col].str.contains(co)
+        gdf[col_name].str.contains(co)
     ]  # geodataframe of entire continent - output of prev function {} are placeholders
     # in strings - conditional formatting
     # insert any variable into that place using .format - extract string and filter for those containing co (MA)
     point = Point(coords["x"], coords["y"])  # point object
 
     try:
         return gdf_co[gdf_co.contains(point)][
-            col
+            col_name
         ].item()  # filter gdf_co which contains point and returns the bus
 
     except ValueError:
         return gdf_co[gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))][
-            col
+            col_name
         ].item()  # looks for closest one shape=node
 
 
@@ -1566,7 +1568,9 @@ def aggregate_fuels(sector):
     ]
 
     oil_fuels = [
-        "additives and oxygenates" "aviation gasoline" "bitumen",
+        "additives and oxygenates",
+        "aviation gasoline",
+        "bitumen",
         "conventional crude oil",
         "crude petroleum",
         "ethane",
@@ -1580,7 +1584,7 @@ def aggregate_fuels(sector):
         "naphtha",
         "natural gas liquids",
         "other kerosene",
-        "paraffin waxes" "patent fuel",
+        "paraffin waxes",
         "petroleum coke",
         "refinery gas",
     ]

diff --git a/scripts/build_base_industry_totals.py b/scripts/build_base_industry_totals.py
@@ -8,8 +8,6 @@
 @author: user
 """
 
-import pathlib
-
 import country_converter as coco
 import pandas as pd
 from _helpers import (
@@ -117,11 +115,11 @@ def create_industry_base_totals(df):
     clean_industry_list = list(transaction.clean_name.unique())
 
     unsd_path = get_path(
-        pathlib.Path(snakemake.input["energy_totals_base"]).parent, "demand/unsd/data/"
+        get_path(snakemake.input["energy_totals_base"]).parent, "demand/unsd/data/"
     )
 
     # Get the files from the path provided in the OP
-    all_files = list(pathlib.Path(unsd_path).glob("*.txt"))
+    all_files = list(get_path(unsd_path).glob("*.txt"))
 
     # Create a dataframe from all downloaded files
     df = pd.concat(

diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py
@@ -124,12 +124,12 @@ def get_load_paths_gegis(ssp_parentfolder, config):
     for continent in region_load:
         sel_ext = ".nc"
         for ext in [".nc", ".csv"]:
-            load_path = get_path(BASE_DIR, load_dir, str(continent) + str(ext))
-            if pathlib.Path(load_path).exists():
+            load_path = get_path(BASE_DIR, str(load_dir), str(continent) + str(ext))
+            if get_path(load_path).exists():
                 sel_ext = ext
                 break
         file_name = str(continent) + str(sel_ext)
-        load_path = get_path(load_dir, file_name)
+        load_path = get_path(str(load_dir), file_name)
         load_paths.append(load_path)
         file_names.append(file_name)
 

diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py
@@ -45,7 +45,6 @@
 The output is a raster file with the name `natura.tiff` in the folder `resources/natura/`.
 """
 import os
-import pathlib
 
 import atlite
 import geopandas as gpd
@@ -68,9 +67,7 @@ def get_fileshapes(list_paths, accepted_formats=(".shp",)):
 
     list_fileshapes = []
     for lf in list_paths:
-        if pathlib.Path(
-            lf
-        ).is_dir():  # if it is a folder, then list all shapes files contained
+        if get_path(lf).is_dir():  # if it is a folder, then list all shapes files contained
             # loop over all dirs and subdirs
             for path, subdirs, files in os.walk(lf):
                 # loop over all files

diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
@@ -6,7 +6,6 @@
 # -*- coding: utf-8 -*-
 
 import multiprocessing as mp
-import pathlib
 import shutil
 from itertools import takewhile
 from operator import attrgetter
@@ -22,7 +21,6 @@
     BASE_DIR,
     configure_logging,
     create_logger,
-    get_current_directory_path,
     get_gadm_layer,
     get_path,
     mock_snakemake,
@@ -117,7 +115,7 @@ def country_cover(country_shapes, eez_shapes=None, out_logging=False, distance=0
 
 
 def save_to_geojson(df, fn):
-    pathlib.Path(fn).unlink(missing_ok=True)  # remove file if it exists
+    get_path(fn).unlink(missing_ok=True)  # remove file if it exists
     if not isinstance(df, gpd.GeoDataFrame):
         df = gpd.GeoDataFrame(dict(geometry=df))
 
@@ -139,9 +137,9 @@ def load_eez(countries_codes, geo_crs, eez_gpkg_file="./data/eez/eez_v11.gpkg"):
     The dataset shall be downloaded independently by the user (see
     guide) or together with pypsa-earth package.
     """
-    if not pathlib.Path(eez_gpkg_file).exists():
+    if not get_path(eez_gpkg_file).exists():
         raise Exception(
-            f"File EEZ {eez_gpkg_file} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {pathlib.Path(eez_gpkg).parent}"
+            f"File EEZ {eez_gpkg_file} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {get_path(eez_gpkg).parent}"
         )
 
     geodf_EEZ = gpd.read_file(eez_gpkg_file, engine="pyogrio").to_crs(geo_crs)
@@ -303,7 +301,7 @@ def download_WorldPop_standard(
         BASE_DIR, "data", "WorldPop", WorldPop_filename
     )  # Input filepath tif
 
-    if not pathlib.Path(WorldPop_inputfile).exists() or update is True:
+    if not get_path(WorldPop_inputfile).exists() or update is True:
         if out_logging:
             logger.warning(
                 f"Stage 3 of 5: {WorldPop_filename} does not exist, downloading to {WorldPop_inputfile}"
@@ -395,9 +393,9 @@ def convert_gdp(name_file_nc, year=2015, out_logging=False):
     )  # Input filepath nc
 
     # Check if file exists, otherwise throw exception
-    if not pathlib.Path(GDP_nc).exists():
+    if not get_path(GDP_nc).exists():
         raise Exception(
-            f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {pathlib.Path(GDP_nc).parent}"
+            f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {get_path(GDP_nc).parent}"
         )
 
     # open nc dataset
@@ -439,7 +437,7 @@ def load_gdp(
         BASE_DIR, "data", "GDP", name_file_tif
     )  # Input filepath tif
 
-    if update | (not pathlib.Path(GDP_tif).exists()):
+    if update | (not get_path(GDP_tif).exists()):
         if out_logging:
             logger.warning(
                 f"Stage 5 of 5: File {name_file_tif} not found, the file will be produced by processing {name_file_nc}"

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
@@ -121,7 +121,6 @@
     :align: center
 """
 
-import pathlib
 from functools import reduce
 
 import geopandas as gpd
@@ -134,6 +133,7 @@
     configure_logging,
     create_logger,
     get_aggregation_strategies,
+    get_path,
     mock_snakemake,
     normed,
     update_p_nom_max,
@@ -626,7 +626,7 @@ def clustering_for_n_clusters(
 
 
 def save_to_geojson(s, fn):
-    pathlib.Path(fn).unlink(missing_ok=True)
+    get_path(fn).unlink(missing_ok=True)
     df = s.reset_index()
     schema = {**gpd.io.file.infer_schema(df), "geometry": "Unknown"}
     df.to_file(fn, driver="GeoJSON", schema=schema)

diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py
@@ -26,14 +26,12 @@
 - ``data/osm/out``:  Prepared power data as .geojson and .csv files per country
 - ``resources/osm/raw``: Prepared and per type (e.g. cable/lines) aggregated power data as .geojson and .csv files
 """
-import pathlib
 import shutil
 
 from _helpers import (
     BASE_DIR,
     configure_logging,
     create_logger,
-    get_current_directory_path,
     get_path,
     mock_snakemake,
     read_osm_config,
@@ -135,7 +133,7 @@ def convert_iso_to_geofk(
     for name in names:
         for f in out_formats:
             new_file_name = get_path(store_path_resources, f"all_raw_{name}s.{f}")
-            old_files = list(pathlib.Path(out_path).glob(f"*{name}.{f}"))
+            old_files = list(get_path(out_path).glob(f"*{name}.{f}"))
             # if file is missing, create empty file, otherwise rename it and move it
             if not old_files:
                 with open(new_file_name, "w") as f:

diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py
@@ -24,8 +24,6 @@
 This rule creates a dataframe containing in the columns the relevant statistics for the current run.
 """
 
-import pathlib
-
 import geopandas as gpd
 import numpy as np
 import pandas as pd
@@ -34,6 +32,7 @@
 from _helpers import (
     create_country_list,
     create_logger,
+    get_path,
     get_path_size,
     mock_snakemake,
     three_2_two_digits_country,
@@ -127,7 +126,7 @@ def collect_basic_osm_stats(path, rulename, header):
     """
     Collect basic statistics on OSM data: number of items
     """
-    if pathlib.Path(path).is_file() and get_path_size(path) > 0:
+    if get_path(path).is_file() and get_path_size(path) > 0:
         df = gpd.read_file(path)
         n_elem = len(df)
 
@@ -146,7 +145,7 @@ def collect_network_osm_stats(path, rulename, header, metric_crs="EPSG:3857"):
     - length of the stored shapes
     - length of objects with tag_frequency == 0 (DC elements)
     """
-    if pathlib.Path(path).is_file() and get_path_size(path) > 0:
+    if get_path(path).is_file() and get_path_size(path) > 0:
         df = gpd.read_file(path)
         n_elem = len(df)
         obj_length = (
@@ -248,7 +247,7 @@ def collect_bus_regions_stats(bus_region_rule="build_bus_regions"):
 
     df = pd.DataFrame()
 
-    if pathlib.Path(fp_onshore).is_file() and pathlib.Path(fp_offshore).is_file():
+    if get_path(fp_onshore).is_file() and get_path(fp_offshore).is_file():
         gdf_onshore = gpd.read_file(fp_onshore)
         gdf_offshore = gpd.read_file(fp_offshore)
 
@@ -290,7 +289,7 @@ def capacity_stats(df):
         else:
             return df.groupby("carrier").p_nom.sum().astype(float)
 
-    if pathlib.Path(network_path).is_file():
+    if get_path(network_path).is_file():
         n = pypsa.Network(network_path)
 
         lines_length = float((n.lines.length * n.lines.num_parallel).sum())
@@ -345,7 +344,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"):
     """
     snakemake = _mock_snakemake(rulename)
 
-    if not pathlib.Path(snakemake.output.africa_shape).is_file():
+    if not get_path(snakemake.output.africa_shape).is_file():
         return pd.DataFrame()
 
     df_continent = gpd.read_file(snakemake.output.africa_shape)
@@ -356,7 +355,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"):
         .geometry.area.iloc[0]
     )
 
-    if not pathlib.Path(snakemake.output.gadm_shapes).is_file():
+    if not get_path(snakemake.output.gadm_shapes).is_file():
         return pd.DataFrame()
 
     df_gadm = gpd.read_file(snakemake.output.gadm_shapes)
@@ -470,7 +469,7 @@ def collect_renewable_stats(rulename, technology):
     """
     snakemake = _mock_snakemake(rulename, technology=technology)
 
-    if pathlib.Path(snakemake.output.profile).is_file():
+    if get_path(snakemake.output.profile).is_file():
         res = xr.open_dataset(snakemake.output.profile)
 
         if technology == "hydro":
@@ -503,7 +502,7 @@ def add_computational_stats(df, snakemake, column_name=None):
     comp_data = [np.nan] * 3  # total_time, mean_load and max_memory
 
     if snakemake.benchmark:
-        if not pathlib.Path(snakemake.benchmark).is_file():
+        if not get_path(snakemake.benchmark).is_file():
             return df
 
         bench_data = pd.read_csv(snakemake.benchmark, delimiter="\t")

diff --git a/scripts/make_summary.py b/scripts/make_summary.py
@@ -52,8 +52,6 @@
 Replacing *summaries* with *plots* creates nice colored maps of the results.
 """
 
-import pathlib
-
 import pandas as pd
 import pypsa
 from _helpers import (
@@ -503,7 +501,7 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all
 
     for label, filename in networks_dict.items():
         print(label, filename)
-        if not pathlib.Path(filename).exists():
+        if not get_path(filename).exists():
             print("does not exist!!")
             continue
 

diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py
@@ -112,7 +112,7 @@ def monte_carlo_sampling_pydoe2(
     lh = rescale_distribution(lh, uncertainties_values)
     discrepancy = qmc.discrepancy(lh)
     logger.info(
-        "Discrepancy is:", discrepancy, " more details in function documentation."
+        f"Discrepancy is: {discrepancy} more details in function documentation."
     )
 
     return lh
@@ -142,7 +142,7 @@ def monte_carlo_sampling_chaospy(
     lh = rescale_distribution(lh, uncertainties_values)
     discrepancy = qmc.discrepancy(lh)
     logger.info(
-        "Discrepancy is:", discrepancy, " more details in function documentation."
+        f"Discrepancy is: {discrepancy} more details in function documentation."
     )
 
     return lh
@@ -185,7 +185,7 @@ def monte_carlo_sampling_scipy(
     lh = rescale_distribution(lh, uncertainties_values)
     discrepancy = qmc.discrepancy(lh)
     logger.info(
-        "Discrepancy is:", discrepancy, " more details in function documentation."
+        f"Discrepancy is: {discrepancy} more details in function documentation."
     )
 
     return lh