From 3f9a5b72dafb964e2f8dd59010db40d1275b81ee Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Wed, 5 Jun 2024 00:05:46 +0200 Subject: [PATCH 01/23] _helpers.py os to pathlib --- Snakefile | 2 +- doc/conf.py | 6 +- scripts/_helpers.py | 172 +++++++++++++++++++++++++++++++++++--------- 3 files changed, 144 insertions(+), 36 deletions(-) diff --git a/Snakefile b/Snakefile index ac8724ef3..32c92ba5b 100644 --- a/Snakefile +++ b/Snakefile @@ -6,7 +6,7 @@ import sys sys.path.append("./scripts") -from os.path import normpath, exists, isdir +from os.path import normpath, exists from shutil import copyfile, move from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider diff --git a/doc/conf.py b/doc/conf.py index 8bd5c798a..fdf42ba93 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -12,16 +12,16 @@ # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. +# documentation root, use pathlib.Path.absolute to make it absolute, like shown here. # import datetime -import os +import pathlib import shutil import sys from git import Repo -sys.path.insert(0, os.path.abspath("../scripts")) +sys.path.insert(0, str(pathlib.Path("../scripts").absolute())) for p in sys.path: print(p) diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 4fdf000b9..9d88c26b7 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -7,9 +7,9 @@ import logging import os +import pathlib import subprocess import sys -from pathlib import Path import country_converter as coco import geopandas as gpd @@ -96,12 +96,12 @@ def read_osm_config(*args): {"Africa": {"DZ": "algeria", ...}, ...} """ if "__file__" in globals(): - base_folder = os.path.dirname(__file__) - if not os.path.exists(os.path.join(base_folder, "configs")): - base_folder = os.path.dirname(base_folder) + base_folder = get_dirname_path(__file__) + if not path_exists(get_path(base_folder, "configs")): + base_folder = get_dirname_path(base_folder) else: - base_folder = os.getcwd() - osm_config_path = os.path.join(base_folder, "configs", REGIONS_CONFIG) + base_folder = get_current_directory_path() + osm_config_path = get_path(base_folder, "configs", REGIONS_CONFIG) with open(osm_config_path, "r") as f: osm_config = yaml.safe_load(f) if len(args) == 0: @@ -132,8 +132,8 @@ def sets_path_to_root(root_directory_name): while n >= 0: n -= 1 # if repo_name is current folder name, stop and set path - if repo_name == os.path.basename(os.path.abspath(".")): - repo_path = os.getcwd() # os.getcwd() = current_path + if repo_name == get_basename_abs_path("."): + repo_path = get_current_directory_path() # current_path os.chdir(repo_path) # change dir_path to repo_path print("This is the repository path: ", repo_path) print("Had to go %d folder(s) up." % (n0 - 1 - n)) @@ -143,8 +143,7 @@ def sets_path_to_root(root_directory_name): print("Can't find the repo path.") # if repo_name NOT current folder name, go one directory higher else: - upper_path = os.path.dirname(os.path.abspath(".")) # name of upper folder - os.chdir(upper_path) + change_to_script_dir(".") # change to the upper folder def configure_logging(snakemake, skip_handlers=False): @@ -171,8 +170,8 @@ def configure_logging(snakemake, skip_handlers=False): kwargs.setdefault("level", "INFO") if skip_handlers is False: - fallback_path = Path(__file__).parent.joinpath( - "..", "logs", f"{snakemake.rule}.log" + fallback_path = get_path( + get_dirname_path(__file__), "..", "logs", f"{snakemake.rule}.log" ) logfile = snakemake.log.get( "python", snakemake.log[0] if snakemake.log else fallback_path @@ -249,7 +248,7 @@ def pdbcast(v, h): def load_network_for_plots( - fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True + fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True ): import pypsa from add_electricity import load_costs, update_transmission_costs @@ -260,7 +259,7 @@ def load_network_for_plots( n.stores["carrier"] = n.stores.bus.map(n.buses.carrier) n.links["carrier"] = ( - n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier) + n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier) ) n.lines["carrier"] = "AC line" n.transformers["carrier"] = "AC transformer" @@ -331,8 +330,8 @@ def aggregate_p_curtailed(n): [ ( ( - n.generators_t.p_max_pu.sum().multiply(n.generators.p_nom_opt) - - n.generators_t.p.sum() + n.generators_t.p_max_pu.sum().multiply(n.generators.p_nom_opt) + - n.generators_t.p.sum() ) .groupby(n.generators.carrier) .sum() @@ -358,7 +357,7 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False): costs = {} for c, (p_nom, p_attr) in zip( - n.iterate_components(components.keys(), skip_empty=False), components.values() + n.iterate_components(components.keys(), skip_empty=False), components.values() ): if c.df.empty: continue @@ -390,7 +389,7 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False): def progress_retrieve( - url, file, data=None, headers=None, disable_progress=False, roundto=1.0 + url, file, data=None, headers=None, disable_progress=False, roundto=1.0 ): """ Function to download data from a url with a progress bar progress in @@ -472,19 +471,18 @@ def mock_snakemake(rulename, **wildcards): keyword arguments fixing the wildcards. Only necessary if wildcards are needed. """ - import os import snakemake as sm from pypsa.descriptors import Dict from snakemake.script import Snakemake - script_dir = Path(__file__).parent.resolve() + script_dir = pathlib.Path(__file__).parent.resolve() assert ( - Path.cwd().resolve() == script_dir + pathlib.Path.cwd().resolve() == script_dir ), f"mock_snakemake has to be run from the repository scripts directory {script_dir}" os.chdir(script_dir.parent) for p in sm.SNAKEFILE_CHOICES: - if os.path.exists(p): + if path_exists(p): snakefile = p break workflow = sm.Workflow( @@ -508,7 +506,7 @@ def mock_snakemake(rulename, **wildcards): def make_accessable(*ios): for io in ios: for i in range(len(io)): - io[i] = os.path.abspath(io[i]) + io[i] = get_abs_path(io[i]) make_accessable(job.input, job.output, job.log) snakemake = Snakemake( @@ -527,7 +525,7 @@ def make_accessable(*ios): # create log and output dir if not existent for path in list(snakemake.log) + list(snakemake.output): - Path(path).parent.mkdir(parents=True, exist_ok=True) + build_directory(path) os.chdir(script_dir) return snakemake @@ -636,8 +634,8 @@ def country_name_2_two_digits(country_name): 2-digit country name """ if ( - country_name - == f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}" + country_name + == f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}" ): return "SN-GM" @@ -652,7 +650,7 @@ def read_csv_nafix(file, **kwargs): if "na_values" not in kwargs: kwargs["na_values"] = NA_VALUES - if os.stat(file).st_size > 0: + if get_path_size(file) > 0: return pd.read_csv(file, **kwargs) else: return pd.DataFrame() @@ -670,8 +668,7 @@ def to_csv_nafix(df, path, **kwargs): def save_to_geojson(df, fn): - if os.path.exists(fn): - os.unlink(fn) # remove file if it exists + pathlib.Path(fn).unlink(missing_ok=True) # remove file if it exists # save file if the (Geo)DataFrame is non-empty if df.empty: @@ -701,7 +698,7 @@ def read_geojson(fn, cols=[], dtype=None, crs="EPSG:4326"): CRS of the GeoDataFrame """ # if the file is non-zero, read the geodataframe and return it - if os.path.getsize(fn) > 0: + if get_path_size(fn) > 0: return gpd.read_file(fn) else: # else return an empty GeoDataFrame @@ -747,7 +744,7 @@ def filter_codes(c_list, iso_coding=True): selected(iso_coding=False), ignore iso-specific names. """ if ( - iso_coding + iso_coding ): # if country lists are in iso coding, then check if they are 2-string # 2-code countries ret_list = [c for c in c_list if len(c) == 2] @@ -805,7 +802,7 @@ def get_last_commit_message(path): """ _logger = logging.getLogger(__name__) last_commit_message = None - backup_cwd = os.getcwd() + backup_cwd = get_current_directory_path() try: os.chdir(path) last_commit_message = ( @@ -821,3 +818,114 @@ def get_last_commit_message(path): os.chdir(backup_cwd) return last_commit_message + + +def get_dirname_path(path): + """ + It returns the directory name of the path. + """ + return pathlib.Path(path).parent + + +def get_abs_path(path): + """ + It returns the absolutized version of the path. + """ + return pathlib.Path(path).absolute() + + +def get_basename_abs_path(path): + """ + It returns the base name of a normalized and absolutized version of the + path. + """ + return pathlib.Path(path).absolute().name + + +def get_basename_path(path): + """ + It returns the base name of the path. + """ + return pathlib.Path(path).name + + +def get_path(*args): + """ + It returns a new path string. + """ + return pathlib.Path(*args) + + +def get_path_size(path): + """ + It returns the size of a path (in bytes) + """ + return pathlib.Path(path).stat().st_size + + +def build_directory(path): + """ + It creates recursively the directory and its leaf directories. + Parameters: + path (str): The path to the file + """ + + # Check if the provided path points to a directory + if is_directory_path(path): + pathlib.Path(path).mkdir(parents=True, exist_ok=True) + else: + pathlib.Path(path).parent.mkdir(parents=True, exist_ok=True) + + +def change_to_script_dir(path): + """ + Change the current working directory to the directory containing the given + script. + Parameters: + path (str): The path to the file. + """ + + # Get the absolutized and normalized path of directory containing the file + directory_path = pathlib.Path(path).absolute().parent + + # Change the current working directory to the script directory + os.chdir(directory_path) + + +def get_current_directory_path(): + """ + It returns the current directory path. + """ + return pathlib.Path.cwd() + + +def is_directory_path(path): + """ + It returns True if the path points to a directory. + False otherwise. + """ + return pathlib.Path(path).is_dir() + + +def is_file_path(path): + """ + It returns True if the path points to a file. + False otherwise. + """ + return pathlib.Path(path).is_file() + + +def get_relative_path(path, start_path="."): + """ + It returns a relative path to path from start_path. + Default for start_path is the current directory + """ + return pathlib.Path(path).relative_to(start_path) + + +def path_exists(path): + """ + It returns True if the path exists. + False otherwise. + """ + return pathlib.Path(path).exists() From a3b8022d8f144e1e6d68e66715487aa373bacb89 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Wed, 5 Jun 2024 11:22:12 +0200 Subject: [PATCH 02/23] os to pathlib for add_electricity, add_extra_components, augmented_line_connectetions, base_network, build_bus_regions --- scripts/add_electricity.py | 12 ++++++++---- scripts/add_extra_components.py | 6 +++--- scripts/augmented_line_connections.py | 5 ++--- scripts/base_network.py | 15 ++++++++++----- scripts/build_bus_regions.py | 5 ++--- 5 files changed, 25 insertions(+), 18 deletions(-) diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index 0911588f7..19360c9c6 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -84,14 +84,18 @@ - additional open- and combined-cycle gas turbines (if ``OCGT`` and/or ``CCGT`` is listed in the config setting ``electricity: extendable_carriers``) """ -import os - import numpy as np import pandas as pd import powerplantmatching as pm import pypsa import xarray as xr -from _helpers import configure_logging, create_logger, read_csv_nafix, update_p_nom_max +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + read_csv_nafix, + update_p_nom_max, +) from powerplantmatching.export import map_country_bus idx = pd.IndexSlice @@ -809,7 +813,7 @@ def add_nice_carrier_names(n, config): if "snakemake" not in globals(): from _helpers import mock_snakemake, sets_path_to_root - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("add_electricity") sets_path_to_root("pypsa-earth") configure_logging(snakemake) diff --git a/scripts/add_extra_components.py b/scripts/add_extra_components.py index 29c57e60c..94023ad89 100644 --- a/scripts/add_extra_components.py +++ b/scripts/add_extra_components.py @@ -52,12 +52,12 @@ - ``Stores`` of carrier 'H2' and/or 'battery' in combination with ``Links``. If this option is chosen, the script adds extra buses with corresponding carrier where energy ``Stores`` are attached and which are connected to the corresponding power buses via two links, one each for charging and discharging. This leads to three investment variables for the energy capacity, charging and discharging capacity of the storage unit. """ -import os + import numpy as np import pandas as pd import pypsa -from _helpers import configure_logging, create_logger +from _helpers import change_to_script_dir, configure_logging, create_logger from add_electricity import ( _add_missing_carriers_from_costs, add_nice_carrier_names, @@ -267,7 +267,7 @@ def attach_hydrogen_pipelines(n, costs, config): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("add_extra_components", simpl="", clusters=10) configure_logging(snakemake) diff --git a/scripts/augmented_line_connections.py b/scripts/augmented_line_connections.py index 3b0072457..c6f9520be 100644 --- a/scripts/augmented_line_connections.py +++ b/scripts/augmented_line_connections.py @@ -28,13 +28,12 @@ Description ----------- """ -import os import networkx as nx import numpy as np import pandas as pd import pypsa -from _helpers import configure_logging, create_logger +from _helpers import change_to_script_dir, configure_logging, create_logger from add_electricity import load_costs from networkx.algorithms import complement from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation @@ -54,7 +53,7 @@ def haversine(p): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "augmented_line_connections", network="elec", simpl="", clusters="54" ) diff --git a/scripts/base_network.py b/scripts/base_network.py index 04e0c388d..0f997078b 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -55,7 +55,6 @@ Description ----------- """ -import os import geopandas as gpd import networkx as nx @@ -65,7 +64,13 @@ import scipy as sp import shapely.prepared import shapely.wkt -from _helpers import configure_logging, create_logger, read_csv_nafix +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_path_size, + read_csv_nafix, +) from shapely.ops import unary_union logger = create_logger(__name__) @@ -202,7 +207,7 @@ def _load_lines_from_osm(fp_osm_lines): # TODO Seems to be not needed anymore def _load_links_from_osm(fp_osm_converters, base_network_config, voltages_config): # the links file can be empty - if os.path.getsize(fp_osm_converters) == 0: + if get_path_size(fp_osm_converters) == 0: links = pd.DataFrame() return links @@ -231,7 +236,7 @@ def _load_links_from_osm(fp_osm_converters, base_network_config, voltages_config def _load_converters_from_osm(fp_osm_converters, buses): # the links file can be empty - if os.path.getsize(fp_osm_converters) == 0: + if get_path_size(fp_osm_converters) == 0: converters = pd.DataFrame() return converters @@ -556,7 +561,7 @@ def base_network( if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("base_network") configure_logging(snakemake) diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py index d1e4f5e3c..42f6b6b61 100644 --- a/scripts/build_bus_regions.py +++ b/scripts/build_bus_regions.py @@ -42,12 +42,11 @@ Description ----------- """ -import os import geopandas as gpd import pandas as pd import pypsa -from _helpers import REGION_COLS, configure_logging, create_logger +from _helpers import REGION_COLS, change_to_script_dir, configure_logging, create_logger logger = create_logger(__name__) @@ -150,7 +149,7 @@ def get_gadm_shape( if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_bus_regions") configure_logging(snakemake) From 8c118956b60a089c12f72d2937a83ae18c70c915 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Wed, 5 Jun 2024 12:07:26 +0200 Subject: [PATCH 03/23] re-formatting --- scripts/build_cutout.py | 5 ++--- scripts/build_demand_profiles.py | 14 ++++++++++---- scripts/build_natura_raster.py | 16 ++++++++++++---- scripts/build_osm_network.py | 12 +++++------- scripts/build_powerplants.py | 14 ++++++++------ scripts/build_renewable_profiles.py | 10 +++++++--- 6 files changed, 44 insertions(+), 27 deletions(-) diff --git a/scripts/build_cutout.py b/scripts/build_cutout.py index 06e5a24cd..83f5e1509 100644 --- a/scripts/build_cutout.py +++ b/scripts/build_cutout.py @@ -93,12 +93,11 @@ ----------- """ -import os import atlite import geopandas as gpd import pandas as pd -from _helpers import configure_logging, create_logger +from _helpers import change_to_script_dir, configure_logging, create_logger logger = create_logger(__name__) @@ -107,7 +106,7 @@ if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_cutout", cutout="africa-2013-era5") configure_logging(snakemake) diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py index c5dad677b..ebb230903 100644 --- a/scripts/build_demand_profiles.py +++ b/scripts/build_demand_profiles.py @@ -40,7 +40,7 @@ Then with a function that takes in the PyPSA network "base.nc", region and gadm shape data, the countries of interest, a scale factor, and the snapshots, it returns a csv file called "demand_profiles.csv", that allocates the load to the buses of the network according to GDP and population. """ -import os + from itertools import product import geopandas as gpd @@ -49,7 +49,13 @@ import pypsa import scipy.sparse as sparse import xarray as xr -from _helpers import configure_logging, create_logger, read_osm_config +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_path, + read_osm_config, +) from shapely.prepared import prep from shapely.validation import make_valid @@ -108,7 +114,7 @@ def get_load_paths_gegis(ssp_parentfolder, config): load_paths = [] for continent in region_load: - load_path = os.path.join( + load_path = get_path( ssp_parentfolder, str(ssp), str(prediction_year), @@ -246,7 +252,7 @@ def upsample(cntry, group): if "snakemake" not in globals(): from _helpers import mock_snakemake, sets_path_to_root - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_demand_profiles") sets_path_to_root("pypsa-earth") configure_logging(snakemake) diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py index 9593f7767..ae9fd478a 100644 --- a/scripts/build_natura_raster.py +++ b/scripts/build_natura_raster.py @@ -50,7 +50,13 @@ import geopandas as gpd import numpy as np import rasterio as rio -from _helpers import configure_logging, create_logger +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_path, + is_directory_path, +) from rasterio.features import geometry_mask from rasterio.warp import transform_bounds @@ -65,14 +71,16 @@ def get_fileshapes(list_paths, accepted_formats=(".shp",)): list_fileshapes = [] for lf in list_paths: - if os.path.isdir(lf): # if it is a folder, then list all shapes files contained + if is_directory_path( + lf + ): # if it is a folder, then list all shapes files contained # loop over all dirs and subdirs for path, subdirs, files in os.walk(lf): # loop over all files for subfile in files: # add the subfile if it is a shape file if subfile.endswith(accepted_formats): - list_fileshapes.append(os.path.join(path, subfile)) + list_fileshapes.append(str(get_path(path, subfile))) elif lf.endswith(accepted_formats): list_fileshapes.append(lf) @@ -178,7 +186,7 @@ def unify_protected_shape_areas(inputs, natura_crs, out_logging): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "build_natura_raster", cutouts=["cutouts/africa-2013-era5.nc"] ) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 1ebef49e9..25f8d7a9c 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -5,12 +5,12 @@ # -*- coding: utf-8 -*- -import os - import geopandas as gpd import numpy as np import pandas as pd from _helpers import ( + build_directory, + change_to_script_dir, configure_logging, create_logger, read_geojson, @@ -875,16 +875,14 @@ def built_network( logger.info("Save outputs") # create clean directory if not already exist - if not os.path.exists(outputs["lines"]): - os.makedirs(os.path.dirname(outputs["lines"]), exist_ok=True) + build_directory(outputs["lines"]) to_csv_nafix(lines, outputs["lines"]) # Generate CSV to_csv_nafix(converters, outputs["converters"]) # Generate CSV to_csv_nafix(transformers, outputs["transformers"]) # Generate CSV # create clean directory if not already exist - if not os.path.exists(outputs["substations"]): - os.makedirs(os.path.dirname(outputs["substations"]), exist_ok=True) + build_directory(outputs["substations"]) # Generate CSV to_csv_nafix(buses, outputs["substations"]) @@ -895,7 +893,7 @@ def built_network( if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_osm_network") configure_logging(snakemake) diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py index 71b701c1b..b61331241 100644 --- a/scripts/build_powerplants.py +++ b/scripts/build_powerplants.py @@ -100,8 +100,6 @@ 4. OSM extraction was supposed to be ignoring non-generation features like CHP and Natural Gas storage (in contrast to PPM). """ -import os - import geopandas as gpd import numpy as np import pandas as pd @@ -109,8 +107,12 @@ import pypsa import yaml from _helpers import ( + change_to_script_dir, configure_logging, create_logger, + get_current_directory_path, + get_path, + get_path_size, read_csv_nafix, to_csv_nafix, two_digits_2_name_country, @@ -122,7 +124,7 @@ def convert_osm_to_pm(filepath_ppl_osm, filepath_ppl_pm): - if os.stat(filepath_ppl_osm).st_size == 0: + if get_path_size(filepath_ppl_osm) == 0: return to_csv_nafix(pd.DataFrame(), filepath_ppl_pm, index=False) add_ppls = read_csv_nafix(filepath_ppl_osm, index_col=0, dtype={"bus": "str"}) @@ -298,7 +300,7 @@ def replace_natural_gas_technology(df: pd.DataFrame): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_powerplants") configure_logging(snakemake) @@ -325,8 +327,8 @@ def replace_natural_gas_technology(df: pd.DataFrame): "Please check file configs/powerplantmatching_config.yaml" ) logger.info("Parsing OSM generator data to powerplantmatching format") - config["EXTERNAL_DATABASE"]["fn"] = os.path.join( - os.getcwd(), filepath_osm2pm_ppl + config["EXTERNAL_DATABASE"]["fn"] = get_path( + get_current_directory_path(), filepath_osm2pm_ppl ) else: # create an empty file diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py index eb79f5752..d2d211aca 100644 --- a/scripts/build_renewable_profiles.py +++ b/scripts/build_renewable_profiles.py @@ -191,7 +191,6 @@ reached. """ import functools -import os import time from math import isnan @@ -202,7 +201,12 @@ import pandas as pd import progressbar as pgb import xarray as xr -from _helpers import configure_logging, create_logger, sets_path_to_root +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + sets_path_to_root, +) from add_electricity import load_powerplants from dask.distributed import Client, LocalCluster from pypsa.geo import haversine @@ -488,7 +492,7 @@ def create_scaling_factor( if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_renewable_profiles", technology="solar") sets_path_to_root("pypsa-earth") configure_logging(snakemake) From 04378db63060de2f195f2ccd2736c7ea066f2dc4 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Wed, 5 Jun 2024 12:32:22 +0200 Subject: [PATCH 04/23] os to pathlib build_shapes.py, build_test_configs.py, clean_osm_data.py, cluster_network.py, download_osm_data.py, make_statistics.py --- scripts/build_shapes.py | 57 ++++++++++++++++++++--------------- scripts/build_test_configs.py | 9 +++--- scripts/clean_osm_data.py | 14 ++++----- scripts/cluster_network.py | 8 ++--- scripts/download_osm_data.py | 29 +++++++++++------- scripts/make_statistics.py | 32 ++++++++++++-------- 6 files changed, 84 insertions(+), 65 deletions(-) diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index cc883ef9e..77a97b669 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -6,7 +6,7 @@ # -*- coding: utf-8 -*- import multiprocessing as mp -import os +import pathlib import shutil from itertools import takewhile from operator import attrgetter @@ -19,8 +19,14 @@ import requests import xarray as xr from _helpers import ( + build_directory, + change_to_script_dir, configure_logging, create_logger, + get_current_directory_path, + get_dirname_path, + get_path, + path_exists, sets_path_to_root, three_2_two_digits_country, two_2_three_digits_country, @@ -88,21 +94,21 @@ def download_GADM(country_code, update=False, out_logging=False): GADM_filename = get_GADM_filename(country_code) GADM_url = f"https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/{GADM_filename}.gpkg" - GADM_inputfile_gpkg = os.path.join( - os.getcwd(), + GADM_inputfile_gpkg = get_path( + get_current_directory_path(), "data", "gadm", GADM_filename, GADM_filename + ".gpkg", ) # Input filepath gpkg - if not os.path.exists(GADM_inputfile_gpkg) or update is True: + if not path_exists(GADM_inputfile_gpkg) or update is True: if out_logging: logger.warning( f"Stage 5 of 5: {GADM_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {GADM_inputfile_gpkg}" ) # create data/osm directory - os.makedirs(os.path.dirname(GADM_inputfile_gpkg), exist_ok=True) + build_directory(GADM_inputfile_gpkg) try: r = requests.get(GADM_url, stream=True, timeout=300) @@ -296,8 +302,7 @@ def country_cover(country_shapes, eez_shapes=None, out_logging=False, distance=0 def save_to_geojson(df, fn): - if os.path.exists(fn): - os.unlink(fn) # remove file if it exists + pathlib.Path(fn).unlink(missing_ok=True) # remove file if it exists if not isinstance(df, gpd.GeoDataFrame): df = gpd.GeoDataFrame(dict(geometry=df)) @@ -319,9 +324,9 @@ def load_EEZ(countries_codes, geo_crs, EEZ_gpkg="./data/eez/eez_v11.gpkg"): The dataset shall be downloaded independently by the user (see guide) or together with pypsa-earth package. """ - if not os.path.exists(EEZ_gpkg): + if not path_exists(EEZ_gpkg): raise Exception( - f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {os.path.dirname(EEZ_gpkg)}" + f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {get_dirname_path(EEZ_gpkg)}" ) geodf_EEZ = gpd.read_file(EEZ_gpkg, engine="pyogrio").to_crs(geo_crs) @@ -479,17 +484,17 @@ def download_WorldPop_standard( f"https://data.worldpop.org/GIS/Population/Global_2000_2020_Constrained/2020/maxar_v1/{two_2_three_digits_country(country_code).upper()}/{WorldPop_filename}", ] - WorldPop_inputfile = os.path.join( - os.getcwd(), "data", "WorldPop", WorldPop_filename + WorldPop_inputfile = get_path( + get_current_directory_path(), "data", "WorldPop", WorldPop_filename ) # Input filepath tif - if not os.path.exists(WorldPop_inputfile) or update is True: + if not path_exists(WorldPop_inputfile) or update is True: if out_logging: logger.warning( f"Stage 3 of 5: {WorldPop_filename} does not exist, downloading to {WorldPop_inputfile}" ) # create data/osm directory - os.makedirs(os.path.dirname(WorldPop_inputfile), exist_ok=True) + build_directory(WorldPop_inputfile) loaded = False for WorldPop_url in WorldPop_urls: @@ -533,10 +538,10 @@ def download_WorldPop_API( WorldPop_filename = f"{two_2_three_digits_country(country_code).lower()}_ppp_{year}_UNadj_constrained.tif" # Request to get the file - WorldPop_inputfile = os.path.join( - os.getcwd(), "data", "WorldPop", WorldPop_filename + WorldPop_inputfile = get_path( + get_current_directory_path(), "data", "WorldPop", WorldPop_filename ) # Input filepath tif - os.makedirs(os.path.dirname(WorldPop_inputfile), exist_ok=True) + build_directory(WorldPop_inputfile) year_api = int(str(year)[2:]) loaded = False WorldPop_api_urls = [ @@ -571,17 +576,19 @@ def convert_GDP(name_file_nc, year=2015, out_logging=False): name_file_tif = name_file_nc[:-2] + "tif" # path of the nc file - GDP_nc = os.path.join(os.getcwd(), "data", "GDP", name_file_nc) # Input filepath nc + GDP_nc = get_path( + get_current_directory_path(), "data", "GDP", name_file_nc + ) # Input filepath nc # path of the tif file - GDP_tif = os.path.join( - os.getcwd(), "data", "GDP", name_file_tif + GDP_tif = get_path( + get_current_directory_path(), "data", "GDP", name_file_tif ) # Input filepath nc # Check if file exists, otherwise throw exception - if not os.path.exists(GDP_nc): + if not path_exists(GDP_nc): raise Exception( - f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {os.path.dirname(GDP_nc)}" + f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {get_dirname_path(GDP_nc)}" ) # open nc dataset @@ -620,11 +627,11 @@ def load_GDP( # path of the nc file name_file_tif = name_file_nc[:-2] + "tif" - GDP_tif = os.path.join( - os.getcwd(), "data", "GDP", name_file_tif + GDP_tif = get_path( + get_current_directory_path(), "data", "GDP", name_file_tif ) # Input filepath tif - if update | (not os.path.exists(GDP_tif)): + if update | (not path_exists(GDP_tif)): if out_logging: logger.warning( f"Stage 5 of 5: File {name_file_tif} not found, the file will be produced by processing {name_file_nc}" @@ -1310,7 +1317,7 @@ def gadm( if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_shapes") sets_path_to_root("pypsa-earth") configure_logging(snakemake) diff --git a/scripts/build_test_configs.py b/scripts/build_test_configs.py index 349a1ef00..d19d86cb9 100644 --- a/scripts/build_test_configs.py +++ b/scripts/build_test_configs.py @@ -14,9 +14,8 @@ """ import collections.abc import copy -import os -from pathlib import Path +from _helpers import change_to_script_dir, get_current_directory_path, get_path from ruamel.yaml import YAML @@ -37,7 +36,7 @@ def _parse_inputconfig(input_config, yaml): return input_config if isinstance(input_config, str): - input_config = Path(Path.cwd(), input_config) + input_config = get_path(get_current_directory_path(), input_config) with open(input_config) as fp: return yaml.load(fp) @@ -76,7 +75,7 @@ def create_test_config(default_config, diff_config, output_path): # Output path if isinstance(output_path, str): - output_path = Path(Path.cwd(), output_path) + output_path = get_path(get_current_directory_path(), output_path) # Save file yaml.dump(merged_config, output_path) @@ -88,7 +87,7 @@ def create_test_config(default_config, diff_config, output_path): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_test_configs") # Input paths diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 01b535454..9f7d3ed9a 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -5,16 +5,16 @@ # -*- coding: utf-8 -*- -import os - import geopandas as gpd import numpy as np import pandas as pd import reverse_geocode as rg from _helpers import ( REGION_COLS, + change_to_script_dir, configure_logging, create_logger, + get_path_size, save_to_geojson, to_csv_nafix, ) @@ -902,7 +902,7 @@ def clean_data( ): logger.info("Process OSM lines") - if os.path.getsize(input_files["lines"]) > 0: + if get_path_size(input_files["lines"]) > 0: # Load raw data lines df_lines = load_network_data("lines", data_options) @@ -917,7 +917,7 @@ def clean_data( df_all_lines = df_lines # load cables only if data are stored - if os.path.getsize(input_files["cables"]) > 0: + if get_path_size(input_files["cables"]) > 0: logger.info("Add OSM cables to data") # Load raw data lines df_cables = load_network_data("cables", data_options) @@ -967,7 +967,7 @@ def clean_data( logger.info("Process OSM substations") - if os.path.getsize(input_files["substations"]) > 0: + if get_path_size(input_files["substations"]) > 0: df_all_substations = load_network_data("substations", data_options) # prepare dataset for substations @@ -1027,7 +1027,7 @@ def clean_data( logger.info("Process OSM generators") - if os.path.getsize(input_files["generators"]) > 0: + if get_path_size(input_files["generators"]) > 0: df_all_generators = gpd.read_file(input_files["generators"]) # prepare the generator dataset @@ -1064,7 +1064,7 @@ def clean_data( if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("clean_osm_data") configure_logging(snakemake) diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 34b116a99..aa6fc4fa8 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -121,7 +121,7 @@ :align: center """ -import os +import pathlib from functools import reduce import geopandas as gpd @@ -131,6 +131,7 @@ import pypsa from _helpers import ( REGION_COLS, + change_to_script_dir, configure_logging, create_logger, get_aggregation_strategies, @@ -633,8 +634,7 @@ def clustering_for_n_clusters( def save_to_geojson(s, fn): - if os.path.exists(fn): - os.unlink(fn) + pathlib.Path(fn).unlink(missing_ok=True) df = s.reset_index() schema = {**gpd.io.file.infer_schema(df), "geometry": "Unknown"} df.to_file(fn, driver="GeoJSON", schema=schema) @@ -658,7 +658,7 @@ def cluster_regions(busmaps, inputs, output): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "cluster_network", network="elec", simpl="", clusters="min" ) diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py index ec99baecd..b822cc574 100644 --- a/scripts/download_osm_data.py +++ b/scripts/download_osm_data.py @@ -26,11 +26,17 @@ - ``data/osm/out``: Prepared power data as .geojson and .csv files per country - ``resources/osm/raw``: Prepared and per type (e.g. cable/lines) aggregated power data as .geojson and .csv files """ -import os +import pathlib import shutil -from pathlib import Path -from _helpers import configure_logging, create_logger, read_osm_config +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_current_directory_path, + get_path, + read_osm_config, +) from earth_osm import eo logger = create_logger(__name__) @@ -94,15 +100,17 @@ def convert_iso_to_geofk( if "snakemake" not in globals(): from _helpers import mock_snakemake, sets_path_to_root - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("download_osm_data") sets_path_to_root("pypsa-earth") configure_logging(snakemake) run = snakemake.config.get("run", {}) RDIR = run["name"] + "/" if run.get("name") else "" - store_path_resources = Path.joinpath(Path().cwd(), "resources", RDIR, "osm", "raw") - store_path_data = Path.joinpath(Path().cwd(), "data", "osm") + store_path_resources = get_path( + get_current_directory_path(), "resources", RDIR, "osm", "raw" + ) + store_path_data = get_path(get_current_directory_path(), "data", "osm") country_list = country_list_to_geofk(snakemake.params.countries) eo.save_osm_data( @@ -117,10 +125,9 @@ def convert_iso_to_geofk( out_aggregate=True, ) - out_path = Path.joinpath(store_path_resources, "out") + out_path = get_path(store_path_resources, "out") names = ["generator", "cable", "line", "substation"] out_formats = ["csv", "geojson"] - new_files = os.listdir(out_path) # list downloaded osm files # earth-osm (eo) only outputs files with content # If the file is empty, it is not created @@ -129,9 +136,9 @@ def convert_iso_to_geofk( # Rename and move osm files to the resources folder output for name in names: for f in out_formats: - new_file_name = Path.joinpath(store_path_resources, f"all_raw_{name}s.{f}") - old_files = list(Path(out_path).glob(f"*{name}.{f}")) - # if file is missing, create empty file, otherwise rename it an move it + new_file_name = get_path(store_path_resources, f"all_raw_{name}s.{f}") + old_files = list(pathlib.Path(out_path).glob(f"*{name}.{f}")) + # if file is missing, create empty file, otherwise rename it and move it if not old_files: with open(new_file_name, "w") as f: pass diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py index 5c544b61a..dd410d33d 100644 --- a/scripts/make_statistics.py +++ b/scripts/make_statistics.py @@ -23,15 +23,21 @@ ------- This rule creates a dataframe containing in the columns the relevant statistics for the current run. """ -import os -from pathlib import Path import geopandas as gpd import numpy as np import pandas as pd import pypsa import xarray as xr -from _helpers import create_logger, mock_snakemake, sets_path_to_root, to_csv_nafix +from _helpers import ( + change_to_script_dir, + create_logger, + get_path_size, + is_file_path, + mock_snakemake, + sets_path_to_root, + to_csv_nafix, +) from build_test_configs import create_test_config from shapely.validation import make_valid @@ -43,7 +49,7 @@ def _multi_index_scen(rulename, keys): def _mock_snakemake(rule, **kwargs): - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake(rule, **kwargs) sets_path_to_root("pypsa-earth") return snakemake @@ -123,7 +129,7 @@ def collect_basic_osm_stats(path, rulename, header): """ Collect basic statistics on OSM data: number of items """ - if Path(path).is_file() and Path(path).stat().st_size > 0: + if is_file_path(path) and get_path_size(path) > 0: df = gpd.read_file(path) n_elem = len(df) @@ -142,7 +148,7 @@ def collect_network_osm_stats(path, rulename, header, metric_crs="EPSG:3857"): - length of the stored shapes - length of objects with tag_frequency == 0 (DC elements) """ - if Path(path).is_file() and Path(path).stat().st_size > 0: + if is_file_path(path) and get_path_size(path) > 0: df = gpd.read_file(path) n_elem = len(df) obj_length = ( @@ -244,7 +250,7 @@ def collect_bus_regions_stats(bus_region_rule="build_bus_regions"): df = pd.DataFrame() - if Path(fp_onshore).is_file() and Path(fp_offshore).is_file(): + if is_file_path(fp_onshore) and is_file_path(fp_offshore): gdf_onshore = gpd.read_file(fp_onshore) gdf_offshore = gpd.read_file(fp_offshore) @@ -286,7 +292,7 @@ def capacity_stats(df): else: return df.groupby("carrier").p_nom.sum().astype(float) - if Path(network_path).is_file(): + if is_file_path(network_path): n = pypsa.Network(network_path) lines_length = float((n.lines.length * n.lines.num_parallel).sum()) @@ -341,7 +347,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"): """ snakemake = _mock_snakemake(rulename) - if not Path(snakemake.output.africa_shape).is_file(): + if not is_file_path(snakemake.output.africa_shape): return pd.DataFrame() df_continent = gpd.read_file(snakemake.output.africa_shape) @@ -352,7 +358,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"): .geometry.area.iloc[0] ) - if not Path(snakemake.output.gadm_shapes).is_file(): + if not is_file_path(snakemake.output.gadm_shapes): return pd.DataFrame() df_gadm = gpd.read_file(snakemake.output.gadm_shapes) @@ -466,7 +472,7 @@ def collect_renewable_stats(rulename, technology): """ snakemake = _mock_snakemake(rulename, technology=technology) - if Path(snakemake.output.profile).is_file(): + if is_file_path(snakemake.output.profile): res = xr.open_dataset(snakemake.output.profile) if technology == "hydro": @@ -499,7 +505,7 @@ def add_computational_stats(df, snakemake, column_name=None): comp_data = [np.nan] * 3 # total_time, mean_load and max_memory if snakemake.benchmark: - if not Path(snakemake.benchmark).is_file(): + if not is_file_path(snakemake.benchmark): return df bench_data = pd.read_csv(snakemake.benchmark, delimiter="\t") @@ -581,7 +587,7 @@ def calculate_stats( if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("make_statistics") sets_path_to_root("pypsa-earth") From 16e6c46d6b13f8f44cbd26116f942fd9d0164629 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Wed, 5 Jun 2024 13:10:24 +0200 Subject: [PATCH 05/23] os to pathlib make_summary.py, monte_carlo.py, non_workflow/zip_folder.py, plot_network.py, plot_summary.py, prepare_network.py, retrieve_databundle_light.py, simplify_network.py, solve_network.py --- scripts/make_summary.py | 23 +++--- scripts/monte_carlo.py | 8 +- scripts/non_workflow/zip_folder.py | 7 +- scripts/plot_network.py | 5 +- scripts/plot_summary.py | 7 +- scripts/prepare_network.py | 23 ++++-- scripts/retrieve_databundle_light.py | 105 ++++++++++++++------------- scripts/simplify_network.py | 4 +- scripts/solve_network.py | 13 ++-- 9 files changed, 103 insertions(+), 92 deletions(-) diff --git a/scripts/make_summary.py b/scripts/make_summary.py index 583766ac4..c74938027 100644 --- a/scripts/make_summary.py +++ b/scripts/make_summary.py @@ -51,11 +51,16 @@ Replacing *summaries* with *plots* creates nice colored maps of the results. """ -import os import pandas as pd import pypsa -from _helpers import configure_logging +from _helpers import ( + build_directory, + change_to_script_dir, + configure_logging, + get_path, + path_exists, +) from add_electricity import create_logger, load_costs, update_transmission_costs idx = pd.IndexSlice @@ -496,7 +501,7 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all for label, filename in networks_dict.items(): print(label, filename) - if not os.path.exists(filename): + if not path_exists(filename): print("does not exist!!") continue @@ -527,16 +532,16 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all def to_csv(dfs, dir): - os.makedirs(dir, exist_ok=True) + build_directory(dir) for key, df in dfs.items(): - df.to_csv(os.path.join(dir, f"{key}.csv")) + df.to_csv(get_path(dir, f"{key}.csv")) if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "make_summary", simpl="", @@ -551,9 +556,9 @@ def to_csv(dfs, dir): scenario_name = snakemake.config.get("run", {}).get("name", "") if scenario_name: - network_dir = os.path.join(network_dir, "results", scenario_name, "networks") + network_dir = get_path(network_dir, "results", scenario_name, "networks") else: - network_dir = os.path.join(network_dir, "results", "networks") + network_dir = get_path(network_dir, "results", "networks") configure_logging(snakemake) @@ -569,7 +574,7 @@ def expand_from_wildcard(key): ll = [snakemake.wildcards.ll] networks_dict = { - (simpl, clusters, l, opts): os.path.join( + (simpl, clusters, l, opts): get_path( network_dir, f"elec_s{simpl}_" f"{clusters}_ec_l{l}_{opts}.nc" ) for simpl in expand_from_wildcard("simpl") diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py index a448d142b..22f8874f8 100644 --- a/scripts/monte_carlo.py +++ b/scripts/monte_carlo.py @@ -17,7 +17,7 @@ add_to_snakefile: false # When set to true, enables Monte Carlo sampling samples: 9 # number of optimizations. Note that number of samples when using scipy has to be the square of a prime number sampling_strategy: "chaospy" # "pydoe2", "chaospy", "scipy", packages that are supported - seed: 42 # set seedling for reproducibilty + seed: 42 # set seedling for reproducibility uncertainties: loads_t.p_set: type: uniform @@ -67,14 +67,13 @@ wildcard {unc}, which is described in the config.yaml and created in the Snakefile as a range from 0 to (total number of) SAMPLES. """ -import os import chaospy import numpy as np import pandas as pd import pypsa import seaborn as sns -from _helpers import configure_logging, create_logger +from _helpers import change_to_script_dir, configure_logging, create_logger from pyDOE2 import lhs from scipy.stats import beta, gamma, lognorm, norm, qmc, triang from sklearn.preprocessing import MinMaxScaler @@ -135,7 +134,6 @@ def monte_carlo_sampling_chaospy( Documentation on Chaospy: https://github.com/clicumu/pyDOE2 (fixes latin_cube errors) Documentation on Chaospy latin-hyper cube (quasi-Monte Carlo method): https://chaospy.readthedocs.io/en/master/user_guide/fundamentals/quasi_random_samples.html#Quasi-random-samples """ - import chaospy from scipy.stats import qmc # generate a Nfeatures-dimensional latin hypercube varying between 0 and 1: @@ -350,7 +348,7 @@ def validate_parameters( if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "monte_carlo", simpl="", diff --git a/scripts/non_workflow/zip_folder.py b/scripts/non_workflow/zip_folder.py index 0bac2de21..63dbddf24 100644 --- a/scripts/non_workflow/zip_folder.py +++ b/scripts/non_workflow/zip_folder.py @@ -8,9 +8,8 @@ Module to zip the desired folders to be stored in google drive, or equivalent. """ import os +import pathlib import zipfile -from os.path import basename -from xml.etree.ElementInclude import include from _helpers import sets_path_to_root @@ -25,7 +24,7 @@ def zipFilesInDir(dirName, zipFileName, filter, include_parent=True): for filename in filenames: if filter(filename): # create complete filepath of file in directory - filePath = os.path.join(folderName, filename) + filePath = str(pathlib.Path(folderName, filename)) # path of the zip file if include_parent: @@ -41,7 +40,7 @@ def zipFilesInDir(dirName, zipFileName, filter, include_parent=True): if __name__ == "__main__": # Set path to this file - os.chdir(os.path.dirname(os.path.abspath(__file__))) + os.chdir(pathlib.Path(__file__).parent.absolute()) # Required to set path to pypsa-earth sets_path_to_root("pypsa-earth") diff --git a/scripts/plot_network.py b/scripts/plot_network.py index 8f2763509..124c6c891 100644 --- a/scripts/plot_network.py +++ b/scripts/plot_network.py @@ -17,8 +17,6 @@ ----------- """ -import os - import cartopy.crs as ccrs import geopandas as gpd import matplotlib as mpl @@ -28,6 +26,7 @@ from _helpers import ( aggregate_costs, aggregate_p, + change_to_script_dir, configure_logging, create_logger, load_network_for_plots, @@ -360,7 +359,7 @@ def split_costs(n): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "plot_network", network="elec", diff --git a/scripts/plot_summary.py b/scripts/plot_summary.py index 1491b6692..f6f126a47 100644 --- a/scripts/plot_summary.py +++ b/scripts/plot_summary.py @@ -16,11 +16,10 @@ Description ----------- """ -import os import matplotlib.pyplot as plt import pandas as pd -from _helpers import configure_logging, create_logger +from _helpers import change_to_script_dir, configure_logging, create_logger, get_path logger = create_logger(__name__) @@ -219,7 +218,7 @@ def plot_energy(infn, snmk, fn=None): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "plot_summary", summary="energy", @@ -241,7 +240,7 @@ def plot_energy(infn, snmk, fn=None): logger.error(f"plotting function for {summary} has not been defined") func( - os.path.join(snakemake.input[0], f"{summary}.csv"), + get_path(snakemake.input[0], f"{summary}.csv"), snakemake, snakemake.output[0], ) diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py index 3b92cd31d..299d69280 100755 --- a/scripts/prepare_network.py +++ b/scripts/prepare_network.py @@ -56,7 +56,7 @@ for all ``scenario`` s in the configuration file the rule :mod:`prepare_network`. """ -import os +import pathlib import re from zipfile import ZipFile @@ -65,7 +65,13 @@ import pandas as pd import pypsa import requests -from _helpers import configure_logging, create_logger +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_current_directory_path, + get_path, +) from add_electricity import load_costs, update_transmission_costs idx = pd.IndexSlice @@ -87,13 +93,14 @@ def download_emission_data(): with requests.get(url) as rq: with open("data/co2.zip", "wb") as file: file.write(rq.content) - rootpath = os.getcwd() - file_path = os.path.join(rootpath, "data/co2.zip") + root_path = get_current_directory_path() + file_path = get_path(root_path, "data/co2.zip") with ZipFile(file_path, "r") as zipObj: zipObj.extract( - "v60_CO2_excl_short-cycle_org_C_1970_2018.xls", rootpath + "/data" + "v60_CO2_excl_short-cycle_org_C_1970_2018.xls", + get_path(root_path, "data"), ) - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) return "v60_CO2_excl_short-cycle_org_C_1970_2018.xls" except: logger.error(f"Failed download resource from '{url}'.") @@ -120,7 +127,7 @@ def emission_extractor(filename, emission_year, country_names): """ # data reading process - datapath = os.path.join(os.getcwd(), "data", filename) + datapath = get_path(get_current_directory_path(), "data", filename) df = pd.read_excel(datapath, sheet_name="v6.0_EM_CO2_fossil_IPCC1996", skiprows=8) df.columns = df.iloc[0] df = df.set_index("Country_code_A3") @@ -319,7 +326,7 @@ def set_line_nom_max(n, s_nom_max_set=np.inf, p_nom_max_set=np.inf): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "prepare_network", simpl="", diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py index 1583cc245..e3b0c191b 100644 --- a/scripts/retrieve_databundle_light.py +++ b/scripts/retrieve_databundle_light.py @@ -81,7 +81,7 @@ """ import datetime as dt -import os +import pathlib import re from zipfile import ZipFile @@ -89,9 +89,14 @@ import pandas as pd import yaml from _helpers import ( + change_to_script_dir, configure_logging, create_country_list, create_logger, + get_basename_path, + get_current_directory_path, + get_path, + get_relative_path, progress_retrieve, sets_path_to_root, ) @@ -119,9 +124,9 @@ def load_databundle_config(config): return config -def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=False): +def download_and_unzip_zenodo(config, root_path, hot_run=True, disable_progress=False): """ - download_and_unzip_zenodo(config, rootpath, dest_path, hot_run=True, + download_and_unzip_zenodo(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download and unzip the data from zenodo @@ -130,7 +135,7 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded @@ -143,8 +148,8 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F True when download is successful, False otherwise """ resource = config["category"] - file_path = os.path.join(rootpath, "tempfile.zip") - destination = os.path.relpath(config["destination"]) + file_path = get_path(root_path, "tempfile.zip") + destination = get_relative_path(config["destination"]) url = config["urls"]["zenodo"] if hot_run: @@ -155,7 +160,7 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F with ZipFile(file_path, "r") as zipObj: # Extract all the contents of zip file in current directory zipObj.extractall(path=destination) - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.") except: logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.") @@ -164,9 +169,9 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F return True -def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=False): +def download_and_unzip_gdrive(config, root_path, hot_run=True, disable_progress=False): """ - download_and_unzip_gdrive(config, rootpath, dest_path, hot_run=True, + download_and_unzip_gdrive(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download and unzip the data from google drive @@ -175,7 +180,7 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded @@ -188,8 +193,8 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F True when download is successful, False otherwise """ resource = config["category"] - file_path = os.path.join(rootpath, "tempfile.zip") - destination = os.path.relpath(config["destination"]) + file_path = get_path(root_path, "tempfile.zip") + destination = get_relative_path(config["destination"]) url = config["urls"]["gdrive"] # retrieve file_id from path @@ -216,8 +221,7 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F # if hot run enabled if hot_run: # remove file - if os.path.exists(file_path): - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) # download file from google drive gdd.download_file_from_google_drive( file_id=file_id, @@ -238,10 +242,10 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F def download_and_unzip_protectedplanet( - config, rootpath, attempts=3, hot_run=True, disable_progress=False + config, root_path, attempts=3, hot_run=True, disable_progress=False ): """ - download_and_unzip_protectedplanet(config, rootpath, dest_path, + download_and_unzip_protectedplanet(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download and unzip the data by category from protectedplanet @@ -250,7 +254,7 @@ def download_and_unzip_protectedplanet( ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository attempts : int (default 3) Number of attempts to download the data by month. @@ -266,8 +270,8 @@ def download_and_unzip_protectedplanet( True when download is successful, False otherwise """ resource = config["category"] - file_path = os.path.join(rootpath, "tempfile_wpda.zip") - destination = os.path.relpath(config["destination"]) + file_path = get_path(root_path, "tempfile_wpda.zip") + destination = get_relative_path(config["destination"]) url = config["urls"]["protectedplanet"] def get_first_day_of_month(date): @@ -282,8 +286,7 @@ def get_first_day_of_previous_month(date): ) if hot_run: - if os.path.exists(file_path): - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) downloaded = False @@ -320,17 +323,17 @@ def get_first_day_of_previous_month(date): for fzip in zip_files: # final path of the file try: - inner_zipname = os.path.join(destination, fzip) + inner_zipname = get_path(destination, fzip) zip_obj.extract(fzip, path=destination) - dest_nested = os.path.join(destination, fzip.split(".")[0]) + dest_nested = get_path(destination, fzip.split(".")[0]) with ZipFile(inner_zipname, "r") as nested_zip: nested_zip.extractall(path=dest_nested) # remove inner zip file - os.remove(inner_zipname) + pathlib.Path(inner_zipname).unlink(missing_ok=True) logger.info(f"{resource} - Successfully unzipped file '{fzip}'") except: @@ -340,7 +343,7 @@ def get_first_day_of_previous_month(date): # close and remove outer zip file zip_obj.close() - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) logger.info( f"Downloaded resource '{resource_iter}' from cloud '{url_iter}'." @@ -391,8 +394,7 @@ def download_and_unpack( True when download is successful, False otherwise """ if hot_run: - if os.path.exists(file_path): - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) try: logger.info(f"Downloading resource '{resource}' from cloud '{url}'.") @@ -404,9 +406,9 @@ def download_and_unpack( # then unzip it and remove the original file if unzip: with ZipFile(file_path, "r") as zipfile: - zipfile.extractall(destination) + zipfile.extractall(path=destination) - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.") return True except: @@ -414,9 +416,9 @@ def download_and_unpack( return False -def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=False): +def download_and_unzip_direct(config, root_path, hot_run=True, disable_progress=False): """ - download_and_unzip_direct(config, rootpath, dest_path, hot_run=True, + download_and_unzip_direct(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download the data by category from a direct url with no processing. @@ -426,7 +428,7 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded @@ -439,10 +441,10 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F True when download is successful, False otherwise """ resource = config["category"] - destination = os.path.relpath(config["destination"]) + destination = get_relative_path(config["destination"]) url = config["urls"]["direct"] - file_path = os.path.join(destination, os.path.basename(url)) + file_path = get_path(destination, get_basename_path(url)) unzip = config.get("unzip", False) @@ -457,10 +459,10 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F def download_and_unzip_hydrobasins( - config, rootpath, hot_run=True, disable_progress=False + config, root_path, hot_run=True, disable_progress=False ): """ - download_and_unzip_basins(config, rootpath, dest_path, hot_run=True, + download_and_unzip_basins(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download and unzip the data for hydrobasins from HydroBASINS database @@ -480,7 +482,7 @@ def download_and_unzip_hydrobasins( ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded @@ -493,7 +495,7 @@ def download_and_unzip_hydrobasins( True when download is successful, False otherwise """ resource = config["category"] - destination = os.path.relpath(config["destination"]) + destination = get_relative_path(config["destination"]) url_templ = config["urls"]["hydrobasins"]["base_url"] suffix_list = config["urls"]["hydrobasins"]["suffixes"] @@ -504,7 +506,7 @@ def download_and_unzip_hydrobasins( for rg in suffix_list: url = url_templ + "hybas_" + rg + "_lev" + level_code + "_v1c.zip" - file_path = os.path.join(destination, os.path.basename(url)) + file_path = get_path(destination, get_basename_path(url)) all_downloaded &= download_and_unpack( url=url, @@ -520,9 +522,9 @@ def download_and_unzip_hydrobasins( return all_downloaded -def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=False): +def download_and_unzip_post(config, root_path, hot_run=True, disable_progress=False): """ - download_and_unzip_post(config, rootpath, dest_path, hot_run=True, + download_and_unzip_post(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download the data by category from a post request. @@ -531,7 +533,7 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded @@ -544,18 +546,17 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal True when download is successful, False otherwise """ resource = config["category"] - destination = os.path.relpath(config["destination"]) + destination = get_relative_path(config["destination"]) # load data for post method postdata = config["urls"]["post"] # remove url feature url = postdata.pop("url") - file_path = os.path.join(destination, os.path.basename(url)) + file_path = get_path(destination, get_basename_path(url)) if hot_run: - if os.path.exists(file_path): - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) # try: logger.info(f"Downloading resource '{resource}' from cloud '{url}'.") @@ -571,9 +572,9 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal # then unzip it and remove the original file if config.get("unzip", False): with ZipFile(file_path, "r") as zipfile: - zipfile.extractall(destination) + zipfile.extractall(path=destination) - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.") # except: # logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.") @@ -804,7 +805,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level): gpdf_list = [None] * len(files_to_merge) logger.info("Merging hydrobasins files into: " + output_fl) for i, f_name in tqdm(enumerate(files_to_merge)): - gpdf_list[i] = gpd.read_file(os.path.join(basins_path, f_name)) + gpdf_list[i] = gpd.read_file(get_path(basins_path, f_name)) fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list)).drop_duplicates( subset="HYBAS_ID", ignore_index=True ) @@ -813,7 +814,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level): if __name__ == "__main__": if "snakemake" not in globals(): - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) from _helpers import mock_snakemake snakemake = mock_snakemake("retrieve_databundle_light") @@ -822,7 +823,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level): sets_path_to_root("pypsa-earth") - rootpath = os.getcwd() + root_path = get_current_directory_path() tutorial = snakemake.params.tutorial countries = snakemake.params.countries logger.info(f"Retrieving data for {len(countries)} countries.") @@ -866,7 +867,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level): try: download_and_unzip = globals()[f"download_and_unzip_{host}"] if download_and_unzip( - config_bundles[b_name], rootpath, disable_progress=disable_progress + config_bundles[b_name], root_path, disable_progress=disable_progress ): downloaded_bundle = True except Exception: diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 30d60e32f..48f18c4a9 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -84,7 +84,6 @@ 4. Optionally, if an integer were provided for the wildcard ``{simpl}`` (e.g. ``networks/elec_s500.nc``), the network is clustered to this number of clusters with the routines from the ``cluster_network`` rule with the function ``cluster_network.cluster(...)``. This step is usually skipped! """ -import os import sys from functools import reduce @@ -94,6 +93,7 @@ import pypsa import scipy as sp from _helpers import ( + change_to_script_dir, configure_logging, create_logger, get_aggregation_strategies, @@ -963,7 +963,7 @@ def merge_isolated_nodes(n, threshold, aggregation_strategies=dict()): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("simplify_network", simpl="") configure_logging(snakemake) diff --git a/scripts/solve_network.py b/scripts/solve_network.py index f83b47478..8a84f9499 100755 --- a/scripts/solve_network.py +++ b/scripts/solve_network.py @@ -77,14 +77,17 @@ for all ``scenario`` s in the configuration file the rule :mod:`solve_network`. """ -import os import re -from pathlib import Path import numpy as np import pandas as pd import pypsa -from _helpers import configure_logging, create_logger +from _helpers import ( + build_directory, + change_to_script_dir, + configure_logging, + create_logger, +) from pypsa.descriptors import get_switchable_as_dense as get_as_dense from pypsa.linopf import ( define_constraints, @@ -544,7 +547,7 @@ def solve_network(n, config, opts="", **kwargs): if "snakemake" not in globals(): from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "solve_network", simpl="", @@ -556,7 +559,7 @@ def solve_network(n, config, opts="", **kwargs): tmpdir = snakemake.params.solving.get("tmpdir") if tmpdir is not None: - Path(tmpdir).mkdir(parents=True, exist_ok=True) + build_directory(tmpdir) opts = snakemake.wildcards.opts.split("-") solve_opts = snakemake.params.solving["options"] From 56911ffd60e4d12f7ed0538e12ed5175e05f4f2f Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Wed, 5 Jun 2024 13:15:33 +0200 Subject: [PATCH 06/23] add unit test setup --- test/__init__.py | 4 + test/conftest.py | 21 +++++ test/test_helpers.py | 158 +++++++++++++++++++++++++++++++++++ test/test_prepare_network.py | 13 +++ 4 files changed, 196 insertions(+) create mode 100644 test/__init__.py create mode 100644 test/conftest.py create mode 100644 test/test_helpers.py create mode 100644 test/test_prepare_network.py diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 000000000..fa7a7644d --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 000000000..3ba165e42 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +import pathlib + +import pytest + +_content_temp_file = "content" +_name_temp_file = "hello.txt" + + +@pytest.fixture(scope="function") +def get_temp_file(tmpdir): + p = tmpdir.join(_name_temp_file) + p.write(_content_temp_file) + yield p + pathlib.Path(p).unlink(missing_ok=True) diff --git a/test/test_helpers.py b/test/test_helpers.py new file mode 100644 index 000000000..a7981ede8 --- /dev/null +++ b/test/test_helpers.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +import os +import pathlib +from test.conftest import _content_temp_file, _name_temp_file, get_temp_file + +from scripts._helpers import ( + change_to_script_dir, + get_abs_path, + get_basename_abs_path, + get_current_directory_path, + get_dirname_path, + get_path, + get_path_size, + get_relative_path, + is_directory_path, + is_file_path, + path_exists, +) + +path_cwd = str(pathlib.Path.cwd()) + + +def test_get_abs_path(): + """ + Verify the path returned by get_abs_path() + """ + abs_file = get_abs_path(__file__) + assert str(abs_file) == os.path.abspath(__file__) + assert str(abs_file) == __file__ + + +def test_change_to_script_dir(): + """ + Verify the path returned by change_to_script_dir() + """ + change_to_script_dir(__file__) + assert str(pathlib.Path.cwd()) == path_cwd + "/test" + change_to_script_dir(".") + assert str(pathlib.Path.cwd()) == path_cwd + + +def test_get_dirname_path(): + """ + Verify the path returned by get_dirname_path() + """ + dir_name_file = get_dirname_path(__file__) + dir_name_cwd = get_dirname_path(".") + assert str(dir_name_file) == os.path.dirname(__file__) + assert str(dir_name_file) == path_cwd + "/test" + assert str(dir_name_cwd) == "." + + +def test_get_basename_abs_path(): + """ + Verify the path returned by get_basename_abs_path() + """ + base_name_file = get_basename_abs_path(__file__) + assert str(base_name_file) == os.path.basename(os.path.abspath(__file__)) + assert str(base_name_file) == "test_helpers.py" + + +def test_get_path(): + """ + Verify the path returned by get_path() + """ + file_name_path_one = get_path( + path_cwd, + "sub_path_1", + "sub_path_2", + "sub_path_3", + "sub_path_4", + "sub_path_5", + "file.nc", + ) + path_name_path_two = get_path( + pathlib.Path(__file__).parent, "..", "logs", "rule.log" + ) + assert str(file_name_path_one) == os.path.join( + path_cwd, + "sub_path_1", + "sub_path_2", + "sub_path_3", + "sub_path_4", + "sub_path_5", + "file.nc", + ) + assert ( + str(file_name_path_one) + == path_cwd + "/sub_path_1/sub_path_2/sub_path_3/sub_path_4/sub_path_5/file.nc" + ) + assert str(path_name_path_two) == str( + pathlib.Path(__file__).parent.joinpath("..", "logs", "rule.log") + ) + + +def test_get_path_size(get_temp_file): + """ + Verify the path size (in bytes) returned by get_path_size() + """ + path = get_temp_file + file_size = get_path_size(path) + assert file_size == os.stat(path).st_size + assert file_size == len(_content_temp_file) + + +def test_get_current_directory_path(): + """ + Verify the current directory path returned by get_current_directory_path() + """ + path = get_current_directory_path() + assert str(path) == os.getcwd() + + +def test_is_directory_path(tmpdir): + """ + Verify if is_directory_path() returns True when path points to directory. + """ + assert is_directory_path(tmpdir) + assert is_directory_path(tmpdir) == os.path.isdir(tmpdir) + assert not is_directory_path(__file__) + + +def test_is_file_path(get_temp_file, tmpdir): + """ + Verify if is_file_path() returns True when path points to file. + """ + path = get_temp_file + assert is_file_path(path) + assert is_file_path(path) == os.path.isfile(path) + assert not is_file_path(tmpdir) + + +def test_get_relative_path(get_temp_file): + """ + Verify the relative path returned by get_relative_path() + """ + path = get_temp_file + # path relative to the parent directory of the temp file + relative_path = get_relative_path(path, get_path(path).parent) + assert str(relative_path) == _name_temp_file + assert str(relative_path) == os.path.relpath(path, start=get_path(path).parent) + + +def test_path_exists(get_temp_file): + """ + Verify if path_exists() returns True when path exists. + """ + path = get_temp_file + pathlib_path = get_path(path) + assert path_exists(path) + assert path_exists(pathlib_path) + assert path_exists(path) == os.path.exists(path) diff --git a/test/test_prepare_network.py b/test/test_prepare_network.py new file mode 100644 index 000000000..9bad7c220 --- /dev/null +++ b/test/test_prepare_network.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +from scripts.prepare_network import download_emission_data + + +def test_download_emission_data(): + filename = download_emission_data() + assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls" From 75d27a565560abccef9bb9e788cb67a6772d6e35 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Wed, 5 Jun 2024 15:20:20 +0200 Subject: [PATCH 07/23] reformatting --- scripts/_helpers.py | 546 ++++++++++++++++++++++++++++++++++++++----- test/test_helpers.py | 307 ++++++++++++++++++++++++ 2 files changed, 794 insertions(+), 59 deletions(-) diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 9d88c26b7..9a421aa39 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -8,13 +8,23 @@ import logging import os import pathlib +import shutil import subprocess import sys +import zipfile import country_converter as coco +import fiona import geopandas as gpd +import numpy as np import pandas as pd +import requests +import snakemake as sm import yaml +from pypsa.components import component_attrs, components +from pypsa.descriptors import Dict +from shapely.geometry import Point +from snakemake.script import Snakemake logger = logging.getLogger(__name__) @@ -34,21 +44,21 @@ def handle_exception(exc_type, exc_value, exc_traceback): tb = exc_traceback while tb.tb_next: tb = tb.tb_next - flname = tb.tb_frame.f_globals.get("__file__") - funcname = tb.tb_frame.f_code.co_name + fl_name = tb.tb_frame.f_globals.get("__file__") + func_name = tb.tb_frame.f_code.co_name if issubclass(exc_type, KeyboardInterrupt): logger.error( "Manual interruption %r, function %r: %s", - flname, - funcname, + fl_name, + func_name, exc_value, ) else: logger.error( "An error happened in module %r, function %r: %s", - flname, - funcname, + fl_name, + func_name, exc_value, exc_info=(exc_type, exc_value, exc_traceback), ) @@ -59,12 +69,12 @@ def create_logger(logger_name, level=logging.INFO): Create a logger for a module and adds a handler needed to capture in logs traceback from exceptions emerging during the workflow. """ - logger = logging.getLogger(logger_name) - logger.setLevel(level) + logger_instance = logging.getLogger(logger_name) + logger_instance.setLevel(level) handler = logging.StreamHandler(stream=sys.stdout) - logger.addHandler(handler) + logger_instance.addHandler(handler) sys.excepthook = handle_exception - return logger + return logger_instance def read_osm_config(*args): @@ -112,7 +122,7 @@ def read_osm_config(*args): return tuple([osm_config[a] for a in args]) -def sets_path_to_root(root_directory_name): +def sets_path_to_root(root_directory_name, n=8): """ Search and sets path to the given root directory (root/path/file). @@ -123,10 +133,8 @@ def sets_path_to_root(root_directory_name): n : int Number of folders the function will check upwards/root directed. """ - import os repo_name = root_directory_name - n = 8 # check max 8 levels above. Random default. n0 = n while n >= 0: @@ -219,36 +227,30 @@ def load_network(import_name=None, custom_components=None): from pypsa.descriptors import Dict override_components = None - override_component_attrs = None + override_component_attrs_dict = None if custom_components is not None: override_components = pypsa.components.components.copy() - override_component_attrs = Dict( + override_component_attrs_dict = Dict( {k: v.copy() for k, v in pypsa.components.component_attrs.items()} ) for k, v in custom_components.items(): override_components.loc[k] = v["component"] - override_component_attrs[k] = pd.DataFrame( + override_component_attrs_dict[k] = pd.DataFrame( columns=["type", "unit", "default", "description", "status"] ) for attr, val in v["attributes"].items(): - override_component_attrs[k].loc[attr] = val + override_component_attrs_dict[k].loc[attr] = val return pypsa.Network( import_name=import_name, override_components=override_components, - override_component_attrs=override_component_attrs, - ) - - -def pdbcast(v, h): - return pd.DataFrame( - v.values.reshape((-1, 1)) * h.values, index=v.index, columns=h.index + override_component_attrs=override_component_attrs_dict, ) def load_network_for_plots( - fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True + fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True ): import pypsa from add_electricity import load_costs, update_transmission_costs @@ -259,7 +261,7 @@ def load_network_for_plots( n.stores["carrier"] = n.stores.bus.map(n.buses.carrier) n.links["carrier"] = ( - n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier) + n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier) ) n.lines["carrier"] = "AC line" n.transformers["carrier"] = "AC transformer" @@ -284,11 +286,13 @@ def load_network_for_plots( def update_p_nom_max(n): - # if extendable carriers (solar/onwind/...) have capacity >= 0, - # e.g. existing assets from the OPSD project are included to the network, - # the installed capacity might exceed the expansion limit. - # Hence, we update the assumptions. + """ + If extendable carriers (solar/onwind/...) have capacity >= 0, e.g. existing + assets from the OPSD project are included to the network, the installed + capacity might exceed the expansion limit. + Hence, we update the assumptions. + """ n.generators.p_nom_max = n.generators[["p_nom_min", "p_nom_max"]].max(1) @@ -330,8 +334,8 @@ def aggregate_p_curtailed(n): [ ( ( - n.generators_t.p_max_pu.sum().multiply(n.generators.p_nom_opt) - - n.generators_t.p.sum() + n.generators_t.p_max_pu.sum().multiply(n.generators.p_nom_opt) + - n.generators_t.p.sum() ) .groupby(n.generators.carrier) .sum() @@ -346,7 +350,7 @@ def aggregate_p_curtailed(n): def aggregate_costs(n, flatten=False, opts=None, existing_only=False): - components = dict( + components_dict = dict( Link=("p_nom", "p0"), Generator=("p_nom", "p"), StorageUnit=("p_nom", "p"), @@ -357,7 +361,8 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False): costs = {} for c, (p_nom, p_attr) in zip( - n.iterate_components(components.keys(), skip_empty=False), components.values() + n.iterate_components(components_dict.keys(), skip_empty=False), + components_dict.values(), ): if c.df.empty: continue @@ -389,10 +394,10 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False): def progress_retrieve( - url, file, data=None, headers=None, disable_progress=False, roundto=1.0 + url, file, data=None, headers=None, disable_progress=False, round_to_value=1.0 ): """ - Function to download data from a url with a progress bar progress in + Function to download data from an url with a progress bar progress in retrieving data. Parameters @@ -405,7 +410,7 @@ def progress_retrieve( Data for the request (default None), when not none Post method is used disable_progress : bool When true, no progress bar is shown - roundto : float + round_to_value : float (default 0) Precision used to report the progress e.g. 0.1 stands for 88.1, 10 stands for 90, 80 """ @@ -415,8 +420,11 @@ def progress_retrieve( pbar = tqdm(total=100, disable=disable_progress) - def dlProgress(count, blockSize, totalSize, roundto=roundto): - pbar.n = round(count * blockSize * 100 / totalSize / roundto) * roundto + def dl_progress(count, block_size, total_size): + pbar.n = ( + round(count * block_size * 100 / total_size / round_to_value) + * round_to_value + ) pbar.refresh() if data is not None: @@ -427,7 +435,7 @@ def dlProgress(count, blockSize, totalSize, roundto=roundto): opener.addheaders = headers urllib.request.install_opener(opener) - urllib.request.urlretrieve(url, file, reporthook=dlProgress, data=data) + urllib.request.urlretrieve(url, file, reporthook=dl_progress, data=data) def get_aggregation_strategies(aggregation_strategies): @@ -455,7 +463,7 @@ def get_aggregation_strategies(aggregation_strategies): return bus_strategies, generator_strategies -def mock_snakemake(rulename, **wildcards): +def mock_snakemake(rule_name, **wildcards): """ This function is expected to be executed from the "scripts"-directory of " the snakemake project. It returns a snakemake.script.Snakemake object, @@ -465,20 +473,16 @@ def mock_snakemake(rulename, **wildcards): Parameters ---------- - rulename: str + rule_name: str name of the rule for which the snakemake object should be generated wildcards: keyword arguments fixing the wildcards. Only necessary if wildcards are needed. """ - import snakemake as sm - from pypsa.descriptors import Dict - from snakemake.script import Snakemake - script_dir = pathlib.Path(__file__).parent.resolve() assert ( - pathlib.Path.cwd().resolve() == script_dir + pathlib.Path.cwd().resolve() == script_dir ), f"mock_snakemake has to be run from the repository scripts directory {script_dir}" os.chdir(script_dir.parent) for p in sm.SNAKEFILE_CHOICES: @@ -491,12 +495,12 @@ def mock_snakemake(rulename, **wildcards): workflow.include(snakefile) workflow.global_resources = {} try: - rule = workflow.get_rule(rulename) + rule = workflow.get_rule(rule_name) except Exception as exception: print( exception, - f"The {rulename} might be a conditional rule in the Snakefile.\n" - f"Did you enable {rulename} in the config?", + f"The {rule_name} might be a conditional rule in the Snakefile.\n" + f"Did you enable {rule_name} in the config?", ) raise dag = sm.dag.DAG(workflow, rules=[rule]) @@ -573,7 +577,9 @@ def three_2_two_digits_country(three_code_country): return two_code_country -def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_words=[]): +def two_digits_2_name_country( + two_code_country, name_string="name_short", no_comma=False, remove_start_words=[] +): """ Convert 2-digit country code to full name country: @@ -581,7 +587,10 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word ---------- two_code_country: str 2-digit country name - nocomma: bool (optional, default False) + name_string: str (optional, default name_short) + When name_short CD -> DR Congo + When name_official CD -> Democratic Republic of the Congo + no_comma: bool (optional, default False) When true, country names with comma are extended to remove the comma. Example CD -> Congo, The Democratic Republic of -> The Democratic Republic of Congo remove_start_words: list (optional, default empty) @@ -593,13 +602,15 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word full_name: str full country name """ + if remove_start_words is None: + remove_start_words = list() if two_code_country == "SN-GM": return f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}" - full_name = coco.convert(two_code_country, to="name_short") + full_name = coco.convert(two_code_country, to=name_string) - if nocomma: - # separate list by delim + if no_comma: + # separate list by delimiter splits = full_name.split(", ") # reverse the order @@ -608,7 +619,7 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word # return the merged string full_name = " ".join(splits) - # when list is non empty + # when list is non-empty if remove_start_words: # loop over every provided word for word in remove_start_words: @@ -634,8 +645,8 @@ def country_name_2_two_digits(country_name): 2-digit country name """ if ( - country_name - == f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}" + country_name + == f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}" ): return "SN-GM" @@ -744,7 +755,7 @@ def filter_codes(c_list, iso_coding=True): selected(iso_coding=False), ignore iso-specific names. """ if ( - iso_coding + iso_coding ): # if country lists are in iso coding, then check if they are 2-string # 2-code countries ret_list = [c for c in c_list if len(c) == 2] @@ -866,6 +877,7 @@ def get_path_size(path): def build_directory(path): """ It creates recursively the directory and its leaf directories. + Parameters: path (str): The path to the file """ @@ -881,6 +893,7 @@ def change_to_script_dir(path): """ Change the current working directory to the directory containing the given script. + Parameters: path (str): The path to the file. """ @@ -902,6 +915,7 @@ def get_current_directory_path(): def is_directory_path(path): """ It returns True if the path points to a directory. + False otherwise. """ return pathlib.Path(path).is_dir() @@ -910,6 +924,7 @@ def is_directory_path(path): def is_file_path(path): """ It returns True if the path points to a file. + False otherwise. """ return pathlib.Path(path).is_file() @@ -918,6 +933,7 @@ def is_file_path(path): def get_relative_path(path, start_path="."): """ It returns a relative path to path from start_path. + Default for start_path is the current directory """ return pathlib.Path(path).relative_to(start_path) @@ -926,6 +942,418 @@ def get_relative_path(path, start_path="."): def path_exists(path): """ It returns True if the path exists. + False otherwise. """ return pathlib.Path(path).exists() + + +def create_network_topology(n, prefix, connector=" <-> ", bidirectional=True): + """ + Create a network topology like the power transmission network. + + Parameters + ---------- + n : pypsa.Network + prefix : str + connector : str + bidirectional : bool, default True + True: one link for each connection + False: one link for each connection and direction (back and forth) + + Returns + ------- + pd.DataFrame with columns bus0, bus1 and length + """ + + ln_attrs = ["bus0", "bus1", "length"] + lk_attrs = ["bus0", "bus1", "length", "underwater_fraction"] + + # TODO: temporary fix for when underwater_fraction is not found + if "underwater_fraction" not in n.links.columns: + if n.links.empty: + n.links["underwater_fraction"] = None + else: + n.links["underwater_fraction"] = 0.0 + + candidates = pd.concat( + [n.lines[ln_attrs], n.links.loc[n.links.carrier == "DC", lk_attrs]] + ).fillna(0) + + positive_order = candidates.bus0 < candidates.bus1 + candidates_p = candidates[positive_order] + swap_buses = {"bus0": "bus1", "bus1": "bus0"} + candidates_n = candidates[~positive_order].rename(columns=swap_buses) + candidates = pd.concat([candidates_p, candidates_n]) + + def make_index(c): + return prefix + c.bus0 + connector + c.bus1 + + topo = candidates.groupby(["bus0", "bus1"], as_index=False).mean() + topo.index = topo.apply(make_index, axis=1) + + if not bidirectional: + topo_reverse = topo.copy() + topo_reverse.rename(columns=swap_buses, inplace=True) + topo_reverse.index = topo_reverse.apply(make_index, axis=1) + topo = pd.concat([topo, topo_reverse]) + + return topo + + +def cycling_shift(df, steps=1): + """ + Cyclic shift on index of pd.Series|pd.DataFrame by number of steps. + """ + df = df.copy() + new_index = np.roll(df.index, steps) + df.values[:] = df.reindex(index=new_index).values + return df + + +def download_gadm(country_code, update=False, out_logging=False): + """ + Download gpkg file from GADM for a given country code. + + Parameters + ---------- + country_code : str + Two letter country codes of the downloaded files + update : bool + Update = true, forces re-download of files + + Returns + ------- + gpkg file per country + """ + + gadm_filename = f"gadm36_{two_2_three_digits_country(country_code)}" + gadm_url = f"https://biogeo.ucdavis.edu/data/gadm3.6/gpkg/{gadm_filename}_gpkg.zip" + _logger = logging.getLogger(__name__) + gadm_input_file_zip = get_path( + get_current_directory_path(), + "data", + "raw", + "gadm", + gadm_filename, + gadm_filename + ".zip", + ) # Input filepath zip + + gadm_input_file_gpkg = get_path( + get_current_directory_path(), + "data", + "raw", + "gadm", + gadm_filename, + gadm_filename + ".gpkg", + ) # Input filepath gpkg + + if not path_exists(gadm_input_file_gpkg) or update is True: + if out_logging: + _logger.warning( + f"Stage 4/4: {gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_zip}" + ) + # create data/osm directory + os.makedirs(os.path.dirname(gadm_input_file_zip), exist_ok=True) + + with requests.get(gadm_url, stream=True) as r: + with open(gadm_input_file_zip, "wb") as f: + shutil.copyfileobj(r.raw, f) + + with zipfile.ZipFile(gadm_input_file_zip, "r") as zip_ref: + zip_ref.extractall(os.path.dirname(gadm_input_file_zip)) + + return gadm_input_file_gpkg, gadm_filename + + +def get_gadm_layer(country_list, layer_id, update=False, outlogging=False): + """ + Function to retrieve a specific layer id of a geopackage for a selection of + countries. + + Parameters + ---------- + country_list : str + List of the countries + layer_id : int + Layer to consider in the format GID_{layer_id}. + When the requested layer_id is greater than the last available layer, then the last layer is selected. + When a negative value is requested, then, the last layer is requested + """ + # initialization of the list of geodataframes + geodf_list = [] + + for country_code in country_list: + # download file gpkg + file_gpkg, name_file = download_gadm(country_code, update, outlogging) + + # get layers of a geopackage + list_layers = fiona.listlayers(file_gpkg) + + # get layer name + if layer_id < 0 | layer_id >= len(list_layers): + # when layer id is negative or larger than the number of layers, select the last layer + layer_id = len(list_layers) - 1 + code_layer = np.mod(layer_id, len(list_layers)) + layer_name = ( + f"gadm36_{two_2_three_digits_country(country_code).upper()}_{code_layer}" + ) + + # read gpkg file + geodf_temp = gpd.read_file(file_gpkg, layer=layer_name) + + # convert country name representation of the main country (GID_0 column) + geodf_temp["GID_0"] = [ + three_2_two_digits_country(twoD_c) for twoD_c in geodf_temp["GID_0"] + ] + + # create a subindex column that is useful + # in the GADM processing of sub-national zones + geodf_temp["GADM_ID"] = geodf_temp[f"GID_{code_layer}"] + + # concatenate geodataframes + geodf_list = pd.concat([geodf_list, geodf_temp]) + + geodf_gadm = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True)) + geodf_gadm.set_crs(geodf_list[0].crs, inplace=True) + + return geodf_gadm + + +def locate_bus( + coords, + co, + gadm_level, + path_to_gadm=None, + gadm_clustering=False, +): + """ + Function to locate the right node for a coordinate set input coords of + point. + + Parameters + ---------- + coords: pandas dataseries + dataseries with 2 rows x & y representing the longitude and latitude + co: string (code for country where coords are MA Morocco) + code of the countries where the coordinates are + """ + col = "name" + if not gadm_clustering: + gdf = gpd.read_file(path_to_gadm) + else: + if path_to_gadm: + gdf = gpd.read_file(path_to_gadm) + if "GADM_ID" in gdf.columns: + col = "GADM_ID" + + if gdf[col][0][ + :3 + ].isalpha(): # TODO clean later by changing all codes to 2 letters + gdf[col] = gdf[col].apply( + lambda name: three_2_two_digits_country(name[:3]) + name[3:] + ) + else: + gdf = get_gadm_layer(co, gadm_level) + col = "GID_{}".format(gadm_level) + + # gdf.set_index("GADM_ID", inplace=True) + gdf_co = gdf[ + gdf[col].str.contains(co) + ] # geodataframe of entire continent - output of prev function {} are placeholders + # in strings - conditional formatting + # insert any variable into that place using .format - extract string and filter for those containing co (MA) + point = Point(coords["x"], coords["y"]) # point object + + try: + return gdf_co[gdf_co.contains(point)][ + col + ].item() # filter gdf_co which contains point and returns the bus + + except ValueError: + return gdf_co[gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))][ + col + ].item() # looks for closest one shape=node + + +def override_component_attrs(directory): + """Tell PyPSA that links can have multiple outputs by + overriding the component_attrs. This can be done for + as many buses as you need with format busi for i = 2,3,4,5,.... + See https://pypsa.org/doc/components.html#link-with-multiple-outputs-or-inputs + + Parameters + ---------- + directory : string + Folder where component attributes to override are stored + analogous to ``pypsa/component_attrs``, e.g. `links.csv`. + + Returns + ------- + Dictionary of overridden component attributes. + """ + + attrs = Dict({k: v.copy() for k, v in component_attrs.items()}) + + for component, list_name in components.list_name.items(): + fn = f"{directory}/{list_name}.csv" + if os.path.isfile(fn): + overrides = pd.read_csv(fn, index_col=0, na_values="n/a") + attrs[component] = overrides.combine_first(attrs[component]) + + return attrs + + +def get_conv_factors(sector): + """ + Create a dictionary with all the conversion factors for the standard net calorific value + from Tera Joule per Kilo Metric-ton to Tera Watt-hour based on + https://unstats.un.org/unsd/energy/balance/2014/05.pdf. + + Considering that 1 Watt-hour = 3600 Joule, one obtains the values below dividing + the standard net calorific values from the pdf by 3600. + + For example, the value "hard coal": 0.007167 is given by 25.8 / 3600, where 25.8 is the standard + net calorific value. + """ + + conversion_factors_dict = { + "additives and oxygenates": 0.008333, + "anthracite": 0.005, + "aviation gasoline": 0.01230, + "bagasse": 0.002144, + "biodiesel": 0.01022, + "biogasoline": 0.007444, + "bio jet kerosene": 0.011111, + "bitumen": 0.01117, + "brown coal": 0.003889, + "brown coal briquettes": 0.00575, + "charcoal": 0.00819, + "coal tar": 0.007778, + "coke-oven coke": 0.0078334, + "coke-oven gas": 0.000277, + "coking coal": 0.007833, + "conventional crude oil": 0.01175, + "crude petroleum": 0.011750, + "ethane": 0.01289, + "fuel oil": 0.01122, + "fuelwood": 0.00254, + "gas coke": 0.007326, + "gas oil/ diesel oil": 0.01194, + "gasoline-type jet fuel": 0.01230, + "hard coal": 0.007167, + "kerosene-type jet fuel": 0.01225, + "lignite": 0.003889, + "liquefied petroleum gas (lpg)": 0.01313, + "lubricants": 0.011166, + "motor gasoline": 0.01230, + "naphtha": 0.01236, + "natural gas": 0.00025, + "natural gas liquids": 0.01228, + "oil shale": 0.00247, + "other bituminous coal": 0.005556, + "paraffin waxes": 0.01117, + "patent fuel": 0.00575, + "peat": 0.00271, + "peat products": 0.00271, + "petroleum coke": 0.009028, + "refinery gas": 0.01375, + "sub-bituminous coal": 0.005555, + } + + if sector == "industry": + return conversion_factors_dict + else: + logger.info(f"No conversion factors available for sector {sector}") + return np.nan + + +def aggregate_fuels(sector): + gas_fuels = [ + "blast furnace gas", + "natural gas (including lng)", + "natural gas liquids", + ] + + oil_fuels = [ + "bitumen", + "conventional crude oil", + "crude petroleum", + "ethane", + "fuel oil", + "gas oil/ diesel oil", + "kerosene-type jet fuel", + "liquefied petroleum gas (lpg)", + "lubricants", + "motor gasoline", + "naphtha", + "patent fuel", + "petroleum coke", + "refinery gas", + ] + + coal_fuels = [ + "anthracite", + "brown coal", + "brown coal briquettes", + "coke-oven coke", + "coke-oven gas", + "coking coal", + "gas coke", + "gasworks gas", + "hard coal", + "lignite", + "other bituminous coal", + "peat", + "peat products", + "sub-bituminous coal", + ] + + biomass_fuels = [ + "bagasse", + "fuelwood", + "biogases", + "biogasoline", + "biodiesel", + "charcoal", + "black liquor", + ] + + electricity = ["electricity"] + + heat = ["heat", "direct use of geothermal heat", "direct use of solar thermal heat"] + + if sector == "industry": + return gas_fuels, oil_fuels, biomass_fuels, coal_fuels, heat, electricity + else: + logger.info(f"No fuels available for sector {sector}") + return np.nan + + +def modify_commodity(commodity): + if commodity.strip() == "Hrad coal": + commodity = "Hard coal" + elif commodity.strip().casefold() == "coke oven gas": + commodity = "Coke-oven gas" + elif commodity.strip().casefold() == "coke oven coke": + commodity = "Coke-oven coke" + elif commodity.strip() == "Liquified Petroleum Gas (LPG)": + commodity = "Liquefied Petroleum Gas (LPG)" + elif commodity.strip() == "Gas Oil/Diesel Oil": + commodity = "Gas Oil/ Diesel Oil" + elif commodity.strip() == "Lignite brown coal- recoverable resources": + commodity = "Lignite brown coal - recoverable resources" + return commodity.strip().casefold() + + +def safe_divide(numerator, denominator): + """ + Safe division function that returns NaN when the denominator is zero. + """ + if denominator != 0.0: + return numerator / denominator + else: + logging.warning( + f"Division by zero: {numerator} / {denominator}, returning NaN." + ) + return np.nan diff --git a/test/test_helpers.py b/test/test_helpers.py index a7981ede8..15bba3a5d 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -9,10 +9,16 @@ import pathlib from test.conftest import _content_temp_file, _name_temp_file, get_temp_file +import numpy as np +import pandas as pd + from scripts._helpers import ( + aggregate_fuels, change_to_script_dir, + country_name_2_two_digits, get_abs_path, get_basename_abs_path, + get_conv_factors, get_current_directory_path, get_dirname_path, get_path, @@ -20,12 +26,164 @@ get_relative_path, is_directory_path, is_file_path, + modify_commodity, path_exists, + safe_divide, + sets_path_to_root, + three_2_two_digits_country, + two_2_three_digits_country, + two_digits_2_name_country, ) path_cwd = str(pathlib.Path.cwd()) +original_commodity_data = [ + "Biogases", + "Fuelwood", + "of which: fishing", + "Natural gas liquids", + "Naphtha", + "Motor Gasoline", + "Motor gasoline", + "Gasoline-type jet fuel", + "Peat products", + "Peat Products", + "Direct use of geothermal heat", + "Additives and Oxygenates", + "Electricity", + "Animal waste", + "animal waste", + "Refinery gas", + "Refinery Gas", + "Fuel oil", + "Oil shale", + "Oil Shale", + "Lignite", + "Falling water", + "Petroleum coke", + "Petroleum Coke", + "Aviation gasoline", + "Ethane", + "Natural gas (including LNG)", + "Natural gas", + "Natural Gas (including LNG)", + "Other bituminous coal", + "Paraffin waxes", + "Hard coal", + "Coal", + "Hrad coal", + "Coke Oven Gas", + "Gasworks Gas", + "Brown coal briquettes", + "Brown Coal Briquettes", + "Liquefied petroleum gas (LPG)", + "Liquified Petroleum Gas (LPG)", + "Sub-bituminous coal", + "Kerosene-type Jet Fuel", + "Charcoal", + "Heat", + "Gas coke", + "Gas Coke", + "Patent fuel", + "Peat (for fuel use)", + "Peat", + "Coal Tar", + "Biogasoline", + "Coking coal", + "Electricity generating capacity", + "Anthracite", + "Coke oven coke", + "Coke-oven coke", + "Coke Oven Coke", + "Conventional crude oil", + "Crude petroleum", + "Brown coal", + "Lignite brown coal", + "Lignite brown coal- recoverable resources", + "Biodiesel", + "Lubricants", + "Black Liquor", + "Gas Oil/ Diesel Oil", + "Gas Oil/ Diesel Oil ", + "Gas Oil/Diesel Oil", + "Bagasse", + "Direct use of solar thermal heat", + "Bio jet kerosene", + "Blast Furnace Gas", + "Blast furnace gas", + "Bitumen", +] + +modified_commodity_data = [ + "biogases", + "fuelwood", + "of which: fishing", + "natural gas liquids", + "naphtha", + "motor gasoline", + "gasoline-type jet fuel", + "peat products", + "direct use of geothermal heat", + "additives and oxygenates", + "electricity", + "animal waste", + "refinery gas", + "fuel oil", + "oil shale", + "lignite", + "falling water", + "petroleum coke", + "aviation gasoline", + "ethane", + "natural gas (including lng)", + "natural gas", + "other bituminous coal", + "paraffin waxes", + "hard coal", + "coal", + "coke-oven gas", + "gasworks gas", + "brown coal briquettes", + "liquefied petroleum gas (lpg)", + "sub-bituminous coal", + "kerosene-type jet fuel", + "charcoal", + "heat", + "gas coke", + "patent fuel", + "peat (for fuel use)", + "peat", + "coal tar", + "biogasoline", + "coking coal", + "electricity generating capacity", + "anthracite", + "coke-oven coke", + "conventional crude oil", + "crude petroleum", + "brown coal", + "lignite brown coal", + "lignite brown coal - recoverable resources", + "biodiesel", + "lubricants", + "black liquor", + "gas oil/ diesel oil", + "bagasse", + "direct use of solar thermal heat", + "bio jet kerosene", + "blast furnace gas", + "bitumen", +] + +original_commodity_dataframe = pd.DataFrame( + original_commodity_data, columns=["Commodity"] +) +modified_commodity_dataframe = pd.DataFrame( + modified_commodity_data, columns=["Commodity"] +) + + def test_get_abs_path(): """ Verify the path returned by get_abs_path() @@ -156,3 +314,152 @@ def test_path_exists(get_temp_file): assert path_exists(path) assert path_exists(pathlib_path) assert path_exists(path) == os.path.exists(path) + + +def test_two_2_three_digits_country(): + """ + Verify the conversion from two-digit to three-digit country code. + """ + # Afghanistan + assert two_2_three_digits_country("AF") == "AFG" + # American Samoa + assert two_2_three_digits_country("AS") == "ASM" + # Aruba + assert two_2_three_digits_country("AW") == "ABW" + # Germany + assert two_2_three_digits_country("DE") == "DEU" + # Micronesia (Federated States of) + assert two_2_three_digits_country("FM") == "FSM" + + +def test_three_2_two_digits_country(): + """ + Verify the conversion from three-digit to two-digit country code. + """ + # Afghanistan + assert "AF" == three_2_two_digits_country("AFG") + # American Samoa + assert "AS" == three_2_two_digits_country("ASM") + # Aruba + assert "AW" == three_2_two_digits_country("ABW") + # Germany + assert "DE" == three_2_two_digits_country("DEU") + # Micronesia (Federated States of) + assert "FM" == three_2_two_digits_country("FSM") + + +def test_two_digits_2_name_country(): + """ + Verify the conversion from two-digit country code to country name. + """ + # Micronesia (Federated States of) + assert "Micronesia, Fed. Sts." == two_digits_2_name_country("FM") + assert "Federated States of Micronesia" == two_digits_2_name_country( + "FM", name_string="name_official" + ) + assert "States of Micronesia" == two_digits_2_name_country( + "FM", name_string="name_official", remove_start_words=["Federated "] + ) + # Democratic Republic of the Congo + assert "DR Congo" == two_digits_2_name_country("CD") + assert "Democratic Republic of the Congo" == two_digits_2_name_country( + "CD", name_string="name_official" + ) + assert "Republic of the Congo" == two_digits_2_name_country( + "CD", name_string="name_official", remove_start_words=["Democratic "] + ) + + +def test_country_name_2_two_digits(): + """ + Verify the conversion from country name to two-digit country code. + """ + # Afghanistan + assert "AF" == country_name_2_two_digits("Afghanistan") + # American Samoa + assert "AS" == country_name_2_two_digits("American Samoa") + # Aruba + assert "AW" == country_name_2_two_digits("Aruba") + # Germany + assert "DE" == country_name_2_two_digits("Germany") + # Micronesia (Federated States of) + assert "FM" == country_name_2_two_digits("Micronesia") + + +def test_safe_divide(): + """ + Verify that the method safe_divide prevents divisions by vanishing + denominator. + """ + assert safe_divide(3.0, 2.0) == 1.5 + assert np.isnan(safe_divide(3.0, 0.0)) + + +def test_get_conv_factors(): + """ + Verify that the conversion factors returned by get_conv_factors are + correct. + """ + conversion_factors_dict = get_conv_factors("industry") + assert conversion_factors_dict["additives and oxygenates"] == 0.008333 + assert conversion_factors_dict["anthracite"] == 0.005 + assert conversion_factors_dict["aviation gasoline"] == 0.01230 + assert conversion_factors_dict["bagasse"] == 0.002144 + assert conversion_factors_dict["biodiesel"] == 0.01022 + assert conversion_factors_dict["biogasoline"] == 0.007444 + assert conversion_factors_dict["bio jet kerosene"] == 0.011111 + assert conversion_factors_dict["bitumen"] == 0.01117 + assert conversion_factors_dict["brown coal"] == 0.003889 + assert conversion_factors_dict["brown coal briquettes"] == 0.00575 + assert conversion_factors_dict["charcoal"] == 0.00819 + assert conversion_factors_dict["coal tar"] == 0.007778 + assert conversion_factors_dict["coke-oven coke"] == 0.0078334 + assert conversion_factors_dict["coke-oven gas"] == 0.000277 + assert conversion_factors_dict["coking coal"] == 0.007833 + assert conversion_factors_dict["conventional crude oil"] == 0.01175 + assert conversion_factors_dict["crude petroleum"] == 0.011750 + assert conversion_factors_dict["ethane"] == 0.01289 + assert conversion_factors_dict["fuel oil"] == 0.01122 + assert conversion_factors_dict["fuelwood"] == 0.00254 + assert conversion_factors_dict["gas coke"] == 0.007326 + assert conversion_factors_dict["gas oil/ diesel oil"] == 0.01194 + assert conversion_factors_dict["gasoline-type jet fuel"] == 0.01230 + assert conversion_factors_dict["hard coal"] == 0.007167 + assert conversion_factors_dict["kerosene-type jet fuel"] == 0.01225 + assert conversion_factors_dict["lignite"] == 0.003889 + assert conversion_factors_dict["liquefied petroleum gas (lpg)"] == 0.01313 + assert conversion_factors_dict["lubricants"] == 0.011166 + assert conversion_factors_dict["motor gasoline"] == 0.01230 + assert conversion_factors_dict["naphtha"] == 0.01236 + assert conversion_factors_dict["natural gas liquids"] == 0.01228 + assert conversion_factors_dict["oil shale"] == 0.00247 + assert conversion_factors_dict["other bituminous coal"] == 0.005556 + assert conversion_factors_dict["paraffin waxes"] == 0.01117 + assert conversion_factors_dict["patent fuel"] == 0.00575 + assert conversion_factors_dict["peat"] == 0.00271 + assert conversion_factors_dict["peat products"] == 0.00271 + assert conversion_factors_dict["petroleum coke"] == 0.009028 + assert conversion_factors_dict["refinery gas"] == 0.01375 + assert conversion_factors_dict["sub-bituminous coal"] == 0.005555 + assert np.isnan(get_conv_factors("non-industry")) + + +def test_modify_commodity(): + """ + Verify that modify_commodity returns the commodities in wished format. + """ + new_commodity_dataframe = pd.DataFrame() + new_commodity_dataframe["Commodity"] = ( + original_commodity_dataframe["Commodity"].map(modify_commodity).unique() + ) + df = new_commodity_dataframe.compare(modified_commodity_dataframe) + boolean_flag = df.empty + if not boolean_flag: + assert False + + +def test_aggregate_fuels(): + """ + Verify what is returned by aggregate_fuels. + """ + assert np.isnan(aggregate_fuels("non-industry")) From 0c9dd21d019534447288b148ee4bd9fe988d3021 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Wed, 5 Jun 2024 17:19:09 +0200 Subject: [PATCH 08/23] change to _helpers.py methods --- scripts/_helpers.py | 15 ++++++++------- scripts/make_summary.py | 2 +- scripts/solve_network.py | 2 +- test/test_helpers.py | 7 ++++++- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 9a421aa39..a9b4afe07 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -874,19 +874,20 @@ def get_path_size(path): return pathlib.Path(path).stat().st_size -def build_directory(path): +def build_directory(path, just_parent_directory=True): """ It creates recursively the directory and its leaf directories. Parameters: path (str): The path to the file + just_parent_directory (Boolean) : it creates just the parent directory """ # Check if the provided path points to a directory - if is_directory_path(path): - pathlib.Path(path).mkdir(parents=True, exist_ok=True) - else: + if just_parent_directory: pathlib.Path(path).parent.mkdir(parents=True, exist_ok=True) + else: + pathlib.Path(path).mkdir(parents=True, exist_ok=True) def change_to_script_dir(path): @@ -1054,14 +1055,14 @@ def download_gadm(country_code, update=False, out_logging=False): f"Stage 4/4: {gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_zip}" ) # create data/osm directory - os.makedirs(os.path.dirname(gadm_input_file_zip), exist_ok=True) + build_directory(gadm_input_file_zip) with requests.get(gadm_url, stream=True) as r: with open(gadm_input_file_zip, "wb") as f: shutil.copyfileobj(r.raw, f) with zipfile.ZipFile(gadm_input_file_zip, "r") as zip_ref: - zip_ref.extractall(os.path.dirname(gadm_input_file_zip)) + zip_ref.extractall(get_dirname_path(gadm_input_file_zip)) return gadm_input_file_gpkg, gadm_filename @@ -1197,7 +1198,7 @@ def override_component_attrs(directory): for component, list_name in components.list_name.items(): fn = f"{directory}/{list_name}.csv" - if os.path.isfile(fn): + if is_file_path(fn): overrides = pd.read_csv(fn, index_col=0, na_values="n/a") attrs[component] = overrides.combine_first(attrs[component]) diff --git a/scripts/make_summary.py b/scripts/make_summary.py index c74938027..7bc3aa86c 100644 --- a/scripts/make_summary.py +++ b/scripts/make_summary.py @@ -532,7 +532,7 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all def to_csv(dfs, dir): - build_directory(dir) + build_directory(dir, just_parent_directory=False) for key, df in dfs.items(): df.to_csv(get_path(dir, f"{key}.csv")) diff --git a/scripts/solve_network.py b/scripts/solve_network.py index 8a84f9499..6f7dfdcdb 100755 --- a/scripts/solve_network.py +++ b/scripts/solve_network.py @@ -559,7 +559,7 @@ def solve_network(n, config, opts="", **kwargs): tmpdir = snakemake.params.solving.get("tmpdir") if tmpdir is not None: - build_directory(tmpdir) + build_directory(tmpdir, just_parent_directory=False) opts = snakemake.wildcards.opts.split("-") solve_opts = snakemake.params.solving["options"] diff --git a/test/test_helpers.py b/test/test_helpers.py index 15bba3a5d..c9c176292 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -14,6 +14,7 @@ from scripts._helpers import ( aggregate_fuels, + build_directory, change_to_script_dir, country_name_2_two_digits, get_abs_path, @@ -29,7 +30,6 @@ modify_commodity, path_exists, safe_divide, - sets_path_to_root, three_2_two_digits_country, two_2_three_digits_country, two_digits_2_name_country, @@ -184,6 +184,11 @@ ) +def test_build_directory(tmpdir): + + build_directory(tmpdir) + + def test_get_abs_path(): """ Verify the path returned by get_abs_path() From c64be540e56330c405cf0b0dde4645e7221fd4da Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Wed, 5 Jun 2024 19:38:57 +0200 Subject: [PATCH 09/23] add new unit test for build_directory --- scripts/_helpers.py | 4 +++- test/conftest.py | 11 +++++++++++ test/test_helpers.py | 43 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/scripts/_helpers.py b/scripts/_helpers.py index a9b4afe07..403a359fc 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -880,7 +880,9 @@ def build_directory(path, just_parent_directory=True): Parameters: path (str): The path to the file - just_parent_directory (Boolean) : it creates just the parent directory + just_parent_directory (Boolean): given a path dir/subdir + True: it creates just the parent directory dir + False: it creates the full directory tree dir/subdir """ # Check if the provided path points to a directory diff --git a/test/conftest.py b/test/conftest.py index 3ba165e42..fa8cbd171 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -6,11 +6,14 @@ # -*- coding: utf-8 -*- import pathlib +import shutil import pytest _content_temp_file = "content" _name_temp_file = "hello.txt" +_temp_content_dir = "temp_content_dir" +_sub_temp_content_dir = "sub_temp_content_dir" @pytest.fixture(scope="function") @@ -19,3 +22,11 @@ def get_temp_file(tmpdir): p.write(_content_temp_file) yield p pathlib.Path(p).unlink(missing_ok=True) + + +@pytest.fixture(scope="function") +def get_temp_folder(tmpdir): + temp_content_dir = tmpdir.join(_temp_content_dir) + sub_temp_content_dir = temp_content_dir.join(_sub_temp_content_dir) + yield sub_temp_content_dir + shutil.rmtree(str(sub_temp_content_dir)) diff --git a/test/test_helpers.py b/test/test_helpers.py index c9c176292..c72a02434 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -7,7 +7,13 @@ import os import pathlib -from test.conftest import _content_temp_file, _name_temp_file, get_temp_file +from test.conftest import ( + _content_temp_file, + _name_temp_file, + _sub_temp_content_dir, + _temp_content_dir, + get_temp_file, +) import numpy as np import pandas as pd @@ -19,6 +25,7 @@ country_name_2_two_digits, get_abs_path, get_basename_abs_path, + get_basename_path, get_conv_factors, get_current_directory_path, get_dirname_path, @@ -184,9 +191,39 @@ ) -def test_build_directory(tmpdir): +def test_build_directory(get_temp_folder, tmpdir): + """ + Verify the directory tree returned by build_directory() + """ + + # build_directory(path, just_parent_directory=True) is + # equivalent to os.makedirs(os.path.dirname(path), exist_ok=True) + # Given in fact a path tmpdir/temp_content_dir/sub_temp_content_dir + # it will create just tmpdir/temp_content_dir/ + build_directory(get_temp_folder, just_parent_directory=True) + just_parent_list = [] + for root, dirs, files in os.walk(tmpdir): + just_parent_list.append(str(get_path(root))) - build_directory(tmpdir) + assert len(just_parent_list) == 2 + assert just_parent_list[0] == str(tmpdir) + assert just_parent_list[1] == str(tmpdir.join(_temp_content_dir)) + + # build_directory(path, just_parent_directory=False) is + # equivalent to os.makedirs(path, exist_ok=True) + # Given in fact a path tmpdir/temp_content_dir/sub_temp_content_dir + # it will create the full path tmpdir/temp_content_dir/sub_temp_content_dir + build_directory(get_temp_folder, just_parent_directory=False) + full_tree_list = [] + for root, dirs, files in os.walk(tmpdir): + full_tree_list.append(str(get_path(root))) + + assert len(full_tree_list) == 3 + assert full_tree_list[0] == str(tmpdir) + assert full_tree_list[1] == str(tmpdir.join(_temp_content_dir)) + assert full_tree_list[2] == str( + tmpdir.join(_temp_content_dir, _sub_temp_content_dir) + ) def test_get_abs_path(): From d6f7d9970285dbad89d032971858f90d5d5170cc Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Thu, 6 Jun 2024 10:57:36 +0200 Subject: [PATCH 10/23] modify unit test build_directory --- test/test_helpers.py | 67 +++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/test/test_helpers.py b/test/test_helpers.py index c72a02434..d704ac492 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -7,6 +7,7 @@ import os import pathlib +import shutil from test.conftest import ( _content_temp_file, _name_temp_file, @@ -194,37 +195,65 @@ def test_build_directory(get_temp_folder, tmpdir): """ Verify the directory tree returned by build_directory() + + Please note: + -) build_directory(path, just_parent_directory=True) is equivalent to os.makedirs(os.path.dirname(path)). + Given a path tmpdir/temp_content_dir/sub_temp_content_dir, it will create just tmpdir/temp_content_dir/ + -) build_directory(path, just_parent_directory=False) is equivalent to os.makedirs(path). Given a path + tmpdir/temp_content_dir/sub_temp_content_dir, it will create tmpdir/temp_content_dir/sub_temp_content_dir """ - # build_directory(path, just_parent_directory=True) is - # equivalent to os.makedirs(os.path.dirname(path), exist_ok=True) - # Given in fact a path tmpdir/temp_content_dir/sub_temp_content_dir - # it will create just tmpdir/temp_content_dir/ + # test with pathlib build_directory(get_temp_folder, just_parent_directory=True) - just_parent_list = [] + just_parent_list_pathlib = [] + for root, dirs, files in os.walk(tmpdir): + just_parent_list_pathlib.append(str(get_path(root))) + + assert len(just_parent_list_pathlib) == 2 + assert just_parent_list_pathlib[0] == str(tmpdir) + assert just_parent_list_pathlib[1] == str(tmpdir.join(_temp_content_dir)) + + # remove the temporary folder tmpdir/temp_content_dir/ + shutil.rmtree(pathlib.Path(tmpdir, _temp_content_dir)) + + # test with os.makedirs. Please note for exist_ok=False, + # a FileExistsError is raised if the target directory + # already exists. Hence, setting exist_ok=False ensures + # that the removal with shutil.rmtree was successful + os.makedirs(os.path.dirname(get_temp_folder), exist_ok=False) + just_parent_list_os = [] for root, dirs, files in os.walk(tmpdir): - just_parent_list.append(str(get_path(root))) + just_parent_list_os.append(str(get_path(root))) - assert len(just_parent_list) == 2 - assert just_parent_list[0] == str(tmpdir) - assert just_parent_list[1] == str(tmpdir.join(_temp_content_dir)) + assert just_parent_list_pathlib == just_parent_list_os - # build_directory(path, just_parent_directory=False) is - # equivalent to os.makedirs(path, exist_ok=True) - # Given in fact a path tmpdir/temp_content_dir/sub_temp_content_dir - # it will create the full path tmpdir/temp_content_dir/sub_temp_content_dir + # test with pathlib build_directory(get_temp_folder, just_parent_directory=False) - full_tree_list = [] + full_tree_list_pathlib = [] for root, dirs, files in os.walk(tmpdir): - full_tree_list.append(str(get_path(root))) + full_tree_list_pathlib.append(str(get_path(root))) - assert len(full_tree_list) == 3 - assert full_tree_list[0] == str(tmpdir) - assert full_tree_list[1] == str(tmpdir.join(_temp_content_dir)) - assert full_tree_list[2] == str( + assert len(full_tree_list_pathlib) == 3 + assert full_tree_list_pathlib[0] == str(tmpdir) + assert full_tree_list_pathlib[1] == str(tmpdir.join(_temp_content_dir)) + assert full_tree_list_pathlib[2] == str( tmpdir.join(_temp_content_dir, _sub_temp_content_dir) ) + # remove the temporary folder tmpdir/temp_content_dir/* + shutil.rmtree(pathlib.Path(tmpdir, _temp_content_dir)) + + # test with os.makedirs. Please note for exist_ok=False, + # a FileExistsError is raised if the target directory + # already exists. Hence, setting exist_ok=False ensures + # that the removal with shutil.rmtree was successful + os.makedirs(get_temp_folder, exist_ok=False) + full_tree_list_os = [] + for root, dirs, files in os.walk(tmpdir): + full_tree_list_os.append(str(get_path(root))) + + assert full_tree_list_os == full_tree_list_pathlib + def test_get_abs_path(): """ From 2fdd90ecd78778137ff7f289d1abc9a2a022c808 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Thu, 6 Jun 2024 11:27:58 +0200 Subject: [PATCH 11/23] comment out test_prepare_network --- .github/workflows/ci-unit-test.yaml | 0 test/test_helpers.py | 1 - test/test_prepare_network.py | 10 ++++------ 3 files changed, 4 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/ci-unit-test.yaml diff --git a/.github/workflows/ci-unit-test.yaml b/.github/workflows/ci-unit-test.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/test/test_helpers.py b/test/test_helpers.py index d704ac492..6b68b0906 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -26,7 +26,6 @@ country_name_2_two_digits, get_abs_path, get_basename_abs_path, - get_basename_path, get_conv_factors, get_current_directory_path, get_dirname_path, diff --git a/test/test_prepare_network.py b/test/test_prepare_network.py index 9bad7c220..3181b444e 100644 --- a/test/test_prepare_network.py +++ b/test/test_prepare_network.py @@ -5,9 +5,7 @@ # -*- coding: utf-8 -*- -from scripts.prepare_network import download_emission_data - - -def test_download_emission_data(): - filename = download_emission_data() - assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls" +# from scripts.prepare_network import download_emission_data +# def test_download_emission_data(): +# filename = download_emission_data() +# assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls" From 1498ce4514c8c327b971801be49bd4050ed6b664 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Thu, 6 Jun 2024 11:47:57 +0200 Subject: [PATCH 12/23] add ci-unit-test.yaml --- .github/workflows/ci-unit-test.yaml | 55 +++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/.github/workflows/ci-unit-test.yaml b/.github/workflows/ci-unit-test.yaml index e69de29bb..71796061d 100644 --- a/.github/workflows/ci-unit-test.yaml +++ b/.github/workflows/ci-unit-test.yaml @@ -0,0 +1,55 @@ +name: CI-unit-test + +on: + push: + branches: + - main + pull_request: + branches: + - main + schedule: + - cron: "0 5 * * TUE" + +jobs: + + test-with-pypi: + + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: + - 3.9 + - "3.10" + - "3.11" + - "3.12" + os: + - ubuntu-latest + - macos-latest + - windows-latest + env: + MPLBACKEND: Agg # https://github.com/orgs/community/discussions/26434 + + steps: + + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + pip install .[dev] + + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + + - name: Test with pytest + run: | + pytest test/ From b5158f8d42b19d85080c8a55c8b6286db3fac147 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Thu, 6 Jun 2024 13:44:24 +0200 Subject: [PATCH 13/23] modify ci-unit-test.yaml --- .github/workflows/ci-unit-test.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci-unit-test.yaml b/.github/workflows/ci-unit-test.yaml index 71796061d..abb291580 100644 --- a/.github/workflows/ci-unit-test.yaml +++ b/.github/workflows/ci-unit-test.yaml @@ -43,7 +43,6 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 pytest - pip install .[dev] - name: Lint with flake8 run: | From a3bf6cc565f9dc6f9301dec59b9540befb40f910 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Thu, 6 Jun 2024 13:50:07 +0200 Subject: [PATCH 14/23] remove match statement from monte_carlo.py --- scripts/monte_carlo.py | 49 ++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py index 22f8874f8..3eadb00ca 100644 --- a/scripts/monte_carlo.py +++ b/scripts/monte_carlo.py @@ -237,29 +237,32 @@ def rescale_distribution( dist = value.get("type") params = value.get("args") - match dist: - case "uniform": - l_bounds, u_bounds = params - latin_hypercube[:, idx] = minmax_scale( - latin_hypercube[:, idx], feature_range=(l_bounds, u_bounds) - ) - case "normal": - mean, std = params - latin_hypercube[:, idx] = norm.ppf(latin_hypercube[:, idx], mean, std) - case "lognormal": - shape = params[0] - latin_hypercube[:, idx] = lognorm.ppf(latin_hypercube[:, idx], s=shape) - case "triangle": - mid_point = params[0] - latin_hypercube[:, idx] = triang.ppf(latin_hypercube[:, idx], mid_point) - case "beta": - a, b = params - latin_hypercube[:, idx] = beta.ppf(latin_hypercube[:, idx], a, b) - case "gamma": - shape, scale = params - latin_hypercube[:, idx] = gamma.ppf( - latin_hypercube[:, idx], shape, scale - ) + if dist == "uniform": + l_bounds, u_bounds = params + latin_hypercube[:, idx] = minmax_scale( + latin_hypercube[:, idx], feature_range=(l_bounds, u_bounds) + ) + elif dist == "normal": + mean, std = params + latin_hypercube[:, idx] = norm.ppf(latin_hypercube[:, idx], mean, std) + elif dist == "lognormal": + shape = params[0] + latin_hypercube[:, idx] = lognorm.ppf(latin_hypercube[:, idx], s=shape) + elif dist == "triangle": + mid_point = params[0] + latin_hypercube[:, idx] = triang.ppf(latin_hypercube[:, idx], mid_point) + elif dist == "beta": + a, b = params + latin_hypercube[:, idx] = beta.ppf(latin_hypercube[:, idx], a, b) + elif dist == "gamma": + shape, scale = params + latin_hypercube[:, idx] = gamma.ppf(latin_hypercube[:, idx], shape, scale) + else: + exception_message = ( + f"The value {dist} is not among the allowed ones: uniform, normal, lognormal, " + f"triangle, beta, gamma" + ) + raise NotImplementedError(exception_message) # samples space needs to be from 0 to 1 mm = MinMaxScaler(feature_range=(0, 1), clip=True) From 3971b02c9cf85f294ace6ab43b7f711b9f68a51f Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Thu, 6 Jun 2024 14:44:35 +0200 Subject: [PATCH 15/23] add unit tests at the end of current workflows --- .github/workflows/ci-linux.yaml | 5 +++ .github/workflows/ci-mac.yaml | 5 +++ .github/workflows/ci-unit-test.yaml | 54 ----------------------------- .github/workflows/ci-windows.yaml | 5 +++ 4 files changed, 15 insertions(+), 54 deletions(-) delete mode 100644 .github/workflows/ci-unit-test.yaml diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml index 2f09e347b..8c99cf581 100644 --- a/.github/workflows/ci-linux.yaml +++ b/.github/workflows/ci-linux.yaml @@ -82,6 +82,11 @@ jobs: cp test/tmp/config.landlock_tmp.yaml config.yaml snakemake --cores all solve_all_networks --forceall + - name: Unit tests + run: | + python -m pip install pytest + pytest test/ + # - name: Test plotting and summaries # run: | # snakemake --cores all plot_all_p_nom diff --git a/.github/workflows/ci-mac.yaml b/.github/workflows/ci-mac.yaml index e766539f7..b042f8b3d 100644 --- a/.github/workflows/ci-mac.yaml +++ b/.github/workflows/ci-mac.yaml @@ -68,6 +68,11 @@ jobs: cp test/tmp/config.tutorial_noprogress_tmp.yaml config.yaml snakemake --cores all solve_all_networks + - name: Unit tests + run: | + python -m pip install pytest + pytest test/ + # - name: Test plotting and summaries # run: | # snakemake --cores all plot_all_p_nom diff --git a/.github/workflows/ci-unit-test.yaml b/.github/workflows/ci-unit-test.yaml deleted file mode 100644 index abb291580..000000000 --- a/.github/workflows/ci-unit-test.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: CI-unit-test - -on: - push: - branches: - - main - pull_request: - branches: - - main - schedule: - - cron: "0 5 * * TUE" - -jobs: - - test-with-pypi: - - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - python-version: - - 3.9 - - "3.10" - - "3.11" - - "3.12" - os: - - ubuntu-latest - - macos-latest - - windows-latest - env: - MPLBACKEND: Agg # https://github.com/orgs/community/discussions/26434 - - steps: - - - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 pytest - - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - - - name: Test with pytest - run: | - pytest test/ diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml index 5943cb9cb..a288b65e1 100644 --- a/.github/workflows/ci-windows.yaml +++ b/.github/workflows/ci-windows.yaml @@ -68,6 +68,11 @@ jobs: cp test/tmp/config.tutorial_noprogress_tmp.yaml config.yaml snakemake --cores all solve_all_networks + - name: Unit tests + run: | + python -m pip install pytest + pytest test/ + # - name: Test plotting and summaries # run: | # snakemake --cores all plot_all_p_nom From c059a3ab61851a9599eab7c63c1d92ad990701d1 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Thu, 6 Jun 2024 15:15:57 +0200 Subject: [PATCH 16/23] add os.sep in test_helpers.py --- test/test_helpers.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/test_helpers.py b/test/test_helpers.py index 6b68b0906..8153ee622 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -268,7 +268,7 @@ def test_change_to_script_dir(): Verify the path returned by change_to_script_dir() """ change_to_script_dir(__file__) - assert str(pathlib.Path.cwd()) == path_cwd + "/test" + assert str(pathlib.Path.cwd()) == path_cwd + os.sep + "test" change_to_script_dir(".") assert str(pathlib.Path.cwd()) == path_cwd @@ -280,7 +280,7 @@ def test_get_dirname_path(): dir_name_file = get_dirname_path(__file__) dir_name_cwd = get_dirname_path(".") assert str(dir_name_file) == os.path.dirname(__file__) - assert str(dir_name_file) == path_cwd + "/test" + assert str(dir_name_file) == path_cwd + os.sep + "test" assert str(dir_name_cwd) == "." @@ -318,10 +318,6 @@ def test_get_path(): "sub_path_5", "file.nc", ) - assert ( - str(file_name_path_one) - == path_cwd + "/sub_path_1/sub_path_2/sub_path_3/sub_path_4/sub_path_5/file.nc" - ) assert str(path_name_path_two) == str( pathlib.Path(__file__).parent.joinpath("..", "logs", "rule.log") ) From 52b4d4c6e637cdaba2d50a4fdaf687d53212f19c Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Fri, 7 Jun 2024 12:51:54 +0200 Subject: [PATCH 17/23] from scripts --- scripts/_helpers.py | 23 ++++++----------------- scripts/add_electricity.py | 11 ++++++----- scripts/add_extra_components.py | 13 +++++++++---- scripts/augmented_line_connections.py | 13 +++++++++---- scripts/base_network.py | 10 +++++----- scripts/build_bus_regions.py | 21 ++++++++++++--------- scripts/build_cutout.py | 11 ++++++++--- scripts/build_demand_profiles.py | 12 +++++++----- scripts/build_natura_raster.py | 16 ++++++++-------- scripts/build_osm_network.py | 14 +++++++------- scripts/build_powerplants.py | 10 +++++----- scripts/build_renewable_profiles.py | 15 ++++++++------- scripts/build_shapes.py | 25 +++++++++++++------------ scripts/build_test_configs.py | 10 +++++++--- scripts/clean_osm_data.py | 7 ++++--- scripts/cluster_network.py | 27 ++++++++++++++------------- scripts/download_osm_data.py | 10 ++++++---- scripts/make_statistics.py | 12 ++++++------ scripts/make_summary.py | 9 +++++---- scripts/monte_carlo.py | 15 ++++----------- scripts/plot_network.py | 10 +++++----- scripts/plot_summary.py | 10 ++++++++-- scripts/prepare_network.py | 8 ++++---- scripts/retrieve_databundle_light.py | 10 +++++----- scripts/simplify_network.py | 22 +++++++++++----------- scripts/solve_network.py | 16 ++++++++-------- test/test_prepare_network.py | 10 ++++++---- 27 files changed, 196 insertions(+), 174 deletions(-) diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 403a359fc..6a1ba4337 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -11,6 +11,7 @@ import shutil import subprocess import sys +import urllib import zipfile import country_converter as coco @@ -18,13 +19,18 @@ import geopandas as gpd import numpy as np import pandas as pd +import pypsa import requests import snakemake as sm import yaml +from pypsa.clustering.spatial import _make_consense from pypsa.components import component_attrs, components from pypsa.descriptors import Dict from shapely.geometry import Point from snakemake.script import Snakemake +from tqdm import tqdm + +from scripts.add_electricity import load_costs, update_transmission_costs logger = logging.getLogger(__name__) @@ -172,7 +178,6 @@ def configure_logging(snakemake, skip_handlers=False): skip_handlers : True | False (default) Do (not) skip the default handlers created for redirecting output to STDERR and file. """ - import logging kwargs = snakemake.config.get("logging", dict()).copy() kwargs.setdefault("level", "INFO") @@ -223,8 +228,6 @@ def load_network(import_name=None, custom_components=None): ------- pypsa.Network """ - import pypsa - from pypsa.descriptors import Dict override_components = None override_component_attrs_dict = None @@ -252,8 +255,6 @@ def load_network(import_name=None, custom_components=None): def load_network_for_plots( fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True ): - import pypsa - from add_electricity import load_costs, update_transmission_costs n = pypsa.Network(fn) @@ -414,9 +415,6 @@ def progress_retrieve( (default 0) Precision used to report the progress e.g. 0.1 stands for 88.1, 10 stands for 90, 80 """ - import urllib - - from tqdm import tqdm pbar = tqdm(total=100, disable=disable_progress) @@ -445,14 +443,6 @@ def get_aggregation_strategies(aggregation_strategies): the function's definition) they get lost when custom values are specified in the config. """ - import numpy as np - - # to handle the new version of PyPSA. - try: - from pypsa.clustering.spatial import _make_consense - except Exception: - # TODO: remove after new release and update minimum pypsa version - from pypsa.clustering.spatial import _make_consense bus_strategies = dict(country=_make_consense("Bus", "country")) bus_strategies.update(aggregation_strategies.get("buses", {})) @@ -741,7 +731,6 @@ def create_country_list(input, iso_coding=True): full_codes_list : list Example ["NG","ZA"] """ - import logging _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index 19360c9c6..99738ca1c 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -89,14 +89,16 @@ import powerplantmatching as pm import pypsa import xarray as xr -from _helpers import ( + +from scripts._helpers import ( change_to_script_dir, configure_logging, create_logger, + mock_snakemake, read_csv_nafix, + sets_path_to_root, update_p_nom_max, ) -from powerplantmatching.export import map_country_bus idx = pd.IndexSlice @@ -369,7 +371,7 @@ def attach_wind_and_solar( if not df.query("carrier == @tech").empty: buses = n.buses.loc[ds.indexes["bus"]] - caps = map_country_bus(df.query("carrier == @tech"), buses) + caps = pm.export.map_country_bus(df.query("carrier == @tech"), buses) caps = caps.groupby(["bus"]).p_nom.sum() caps = pd.Series(data=caps, index=ds.indexes["bus"]).fillna(0) else: @@ -811,11 +813,10 @@ def add_nice_carrier_names(n, config): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake, sets_path_to_root - change_to_script_dir(__file__) snakemake = mock_snakemake("add_electricity") sets_path_to_root("pypsa-earth") + configure_logging(snakemake) n = pypsa.Network(snakemake.input.base_network) diff --git a/scripts/add_extra_components.py b/scripts/add_extra_components.py index 94023ad89..46e865093 100644 --- a/scripts/add_extra_components.py +++ b/scripts/add_extra_components.py @@ -57,8 +57,14 @@ import numpy as np import pandas as pd import pypsa -from _helpers import change_to_script_dir, configure_logging, create_logger -from add_electricity import ( + +from scripts._helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) +from scripts.add_electricity import ( _add_missing_carriers_from_costs, add_nice_carrier_names, load_costs, @@ -265,10 +271,9 @@ def attach_hydrogen_pipelines(n, costs, config): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("add_extra_components", simpl="", clusters=10) + configure_logging(snakemake) n = pypsa.Network(snakemake.input.network) diff --git a/scripts/augmented_line_connections.py b/scripts/augmented_line_connections.py index c6f9520be..acd4af727 100644 --- a/scripts/augmented_line_connections.py +++ b/scripts/augmented_line_connections.py @@ -33,12 +33,18 @@ import numpy as np import pandas as pd import pypsa -from _helpers import change_to_script_dir, configure_logging, create_logger -from add_electricity import load_costs from networkx.algorithms import complement from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation from pypsa.geo import haversine_pts +from scripts._helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) +from scripts.add_electricity import load_costs + logger = create_logger(__name__) @@ -51,12 +57,11 @@ def haversine(p): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake( "augmented_line_connections", network="elec", simpl="", clusters="54" ) + configure_logging(snakemake) n = pypsa.Network(snakemake.input.network) diff --git a/scripts/base_network.py b/scripts/base_network.py index 0f997078b..c52514cba 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -64,14 +64,16 @@ import scipy as sp import shapely.prepared import shapely.wkt -from _helpers import ( +from shapely.ops import unary_union + +from scripts._helpers import ( change_to_script_dir, configure_logging, create_logger, get_path_size, + mock_snakemake, read_csv_nafix, ) -from shapely.ops import unary_union logger = create_logger(__name__) @@ -559,11 +561,9 @@ def base_network( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) - snakemake = mock_snakemake("base_network") + configure_logging(snakemake) inputs = snakemake.input diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py index 42f6b6b61..7326abd96 100644 --- a/scripts/build_bus_regions.py +++ b/scripts/build_bus_regions.py @@ -44,9 +44,19 @@ """ import geopandas as gpd +import numpy as np import pandas as pd import pypsa -from _helpers import REGION_COLS, change_to_script_dir, configure_logging, create_logger +from scipy.spatial import Voronoi +from shapely.geometry import Polygon + +from scripts._helpers import ( + REGION_COLS, + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) logger = create_logger(__name__) @@ -66,14 +76,8 @@ def custom_voronoi_partition_pts(points, outline, add_bounds_shape=True, multipl polygons : N - ndarray[dtype=Polygon|MultiPolygon] """ - import numpy as np - from scipy.spatial import Voronoi - from shapely.geometry import Polygon - points = np.asarray(points) - polygons_arr = [] - if len(points) == 1: polygons_arr = [outline] else: @@ -147,10 +151,9 @@ def get_gadm_shape( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("build_bus_regions") + configure_logging(snakemake) countries = snakemake.params.countries diff --git a/scripts/build_cutout.py b/scripts/build_cutout.py index 83f5e1509..297f449a9 100644 --- a/scripts/build_cutout.py +++ b/scripts/build_cutout.py @@ -97,17 +97,22 @@ import atlite import geopandas as gpd import pandas as pd -from _helpers import change_to_script_dir, configure_logging, create_logger + +from scripts._helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) logger = create_logger(__name__) if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("build_cutout", cutout="africa-2013-era5") + configure_logging(snakemake) cutout_params = snakemake.params.cutouts[snakemake.wildcards.cutout] diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py index ebb230903..5cfdca1db 100644 --- a/scripts/build_demand_profiles.py +++ b/scripts/build_demand_profiles.py @@ -49,15 +49,18 @@ import pypsa import scipy.sparse as sparse import xarray as xr -from _helpers import ( +from shapely.prepared import prep +from shapely.validation import make_valid + +from scripts._helpers import ( change_to_script_dir, configure_logging, create_logger, get_path, + mock_snakemake, read_osm_config, + sets_path_to_root, ) -from shapely.prepared import prep -from shapely.validation import make_valid logger = create_logger(__name__) @@ -250,11 +253,10 @@ def upsample(cntry, group): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake, sets_path_to_root - change_to_script_dir(__file__) snakemake = mock_snakemake("build_demand_profiles") sets_path_to_root("pypsa-earth") + configure_logging(snakemake) n = pypsa.Network(snakemake.input.base_network) diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py index ae9fd478a..e71c3df2c 100644 --- a/scripts/build_natura_raster.py +++ b/scripts/build_natura_raster.py @@ -49,16 +49,20 @@ import atlite import geopandas as gpd import numpy as np +import pandas as pd import rasterio as rio -from _helpers import ( +from rasterio.features import geometry_mask +from rasterio.warp import transform_bounds +from shapely.ops import unary_union + +from scripts._helpers import ( change_to_script_dir, configure_logging, create_logger, get_path, is_directory_path, + mock_snakemake, ) -from rasterio.features import geometry_mask -from rasterio.warp import transform_bounds logger = create_logger(__name__) @@ -127,9 +131,6 @@ def unify_protected_shape_areas(inputs, natura_crs, out_logging): ------- unified_shape : GeoDataFrame with a unified "multishape" """ - import pandas as pd - from shapely.ops import unary_union - from shapely.validation import make_valid if out_logging: logger.info("Stage 3/5: Unify protected shape area.") @@ -184,12 +185,11 @@ def unify_protected_shape_areas(inputs, natura_crs, out_logging): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake( "build_natura_raster", cutouts=["cutouts/africa-2013-era5.nc"] ) + configure_logging(snakemake) # get crs diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 25f8d7a9c..9706483f7 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -8,19 +8,21 @@ import geopandas as gpd import numpy as np import pandas as pd -from _helpers import ( +from shapely.geometry import LineString, Point +from shapely.ops import linemerge, split +from tqdm import tqdm + +from scripts._helpers import ( build_directory, change_to_script_dir, configure_logging, create_logger, + mock_snakemake, read_geojson, read_osm_config, sets_path_to_root, to_csv_nafix, ) -from shapely.geometry import LineString, Point -from shapely.ops import linemerge, split -from tqdm import tqdm logger = create_logger(__name__) @@ -408,7 +410,6 @@ def connect_stations_same_station_id(lines, buses): station_id_list = buses.station_id.unique() add_lines = [] - from shapely.geometry import LineString for s_id in station_id_list: buses_station_id = buses[buses.station_id == s_id] @@ -891,10 +892,9 @@ def built_network( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("build_osm_network") + configure_logging(snakemake) # load default crs diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py index b61331241..8696de4a1 100644 --- a/scripts/build_powerplants.py +++ b/scripts/build_powerplants.py @@ -106,19 +106,21 @@ import powerplantmatching as pm import pypsa import yaml -from _helpers import ( +from scipy.spatial import cKDTree as KDTree +from shapely.geometry import Point + +from scripts._helpers import ( change_to_script_dir, configure_logging, create_logger, get_current_directory_path, get_path, get_path_size, + mock_snakemake, read_csv_nafix, to_csv_nafix, two_digits_2_name_country, ) -from scipy.spatial import cKDTree as KDTree -from shapely.geometry import Point logger = create_logger(__name__) @@ -298,8 +300,6 @@ def replace_natural_gas_technology(df: pd.DataFrame): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("build_powerplants") diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py index d2d211aca..cb5740303 100644 --- a/scripts/build_renewable_profiles.py +++ b/scripts/build_renewable_profiles.py @@ -201,16 +201,18 @@ import pandas as pd import progressbar as pgb import xarray as xr -from _helpers import ( +from add_electricity import load_powerplants +from dask.distributed import Client, LocalCluster +from pypsa.geo import haversine +from shapely.geometry import LineString, Point, box + +from scripts._helpers import ( change_to_script_dir, configure_logging, create_logger, + mock_snakemake, sets_path_to_root, ) -from add_electricity import load_powerplants -from dask.distributed import Client, LocalCluster -from pypsa.geo import haversine -from shapely.geometry import LineString, Point, box cc = coco.CountryConverter() @@ -490,11 +492,10 @@ def create_scaling_factor( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("build_renewable_profiles", technology="solar") sets_path_to_root("pypsa-earth") + configure_logging(snakemake) pgb.streams.wrap_stderr() diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index 77a97b669..5ba734438 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -18,7 +18,17 @@ import rasterio import requests import xarray as xr -from _helpers import ( +from numba import njit +from numba.core import types +from numba.typed import Dict +from rasterio.mask import mask +from rasterio.windows import Window +from shapely.geometry import MultiPolygon +from shapely.ops import unary_union +from shapely.validation import make_valid +from tqdm import tqdm + +from scripts._helpers import ( build_directory, change_to_script_dir, configure_logging, @@ -26,21 +36,13 @@ get_current_directory_path, get_dirname_path, get_path, + mock_snakemake, path_exists, sets_path_to_root, three_2_two_digits_country, two_2_three_digits_country, two_digits_2_name_country, ) -from numba import njit -from numba.core import types -from numba.typed import Dict -from rasterio.mask import mask -from rasterio.windows import Window -from shapely.geometry import MultiPolygon -from shapely.ops import unary_union -from shapely.validation import make_valid -from tqdm import tqdm sets_path_to_root("pypsa-earth") @@ -1315,11 +1317,10 @@ def gadm( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("build_shapes") sets_path_to_root("pypsa-earth") + configure_logging(snakemake) out = snakemake.output diff --git a/scripts/build_test_configs.py b/scripts/build_test_configs.py index d19d86cb9..7e066e0db 100644 --- a/scripts/build_test_configs.py +++ b/scripts/build_test_configs.py @@ -15,9 +15,15 @@ import collections.abc import copy -from _helpers import change_to_script_dir, get_current_directory_path, get_path from ruamel.yaml import YAML +from scripts._helpers import ( + change_to_script_dir, + get_current_directory_path, + get_path, + mock_snakemake, +) + def update(d, u): for k, v in u.items(): @@ -85,8 +91,6 @@ def create_test_config(default_config, diff_config, output_path): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("build_test_configs") diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 9f7d3ed9a..e762e2019 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -9,12 +9,14 @@ import numpy as np import pandas as pd import reverse_geocode as rg -from _helpers import ( + +from scripts._helpers import ( REGION_COLS, change_to_script_dir, configure_logging, create_logger, get_path_size, + mock_snakemake, save_to_geojson, to_csv_nafix, ) @@ -1062,10 +1064,9 @@ def clean_data( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("clean_osm_data") + configure_logging(snakemake) tag_substation = snakemake.params.clean_osm_data_options["tag_substation"] diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index aa6fc4fa8..b0ac236c1 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -129,24 +129,27 @@ import pandas as pd import pyomo.environ as po import pypsa -from _helpers import ( +from pypsa.clustering.spatial import ( + busmap_by_greedy_modularity, + busmap_by_hac, + busmap_by_kmeans, + get_clustering_from_busmap, +) +from scipy.sparse import csgraph +from shapely.geometry import Point + +from scripts._helpers import ( REGION_COLS, change_to_script_dir, configure_logging, create_logger, get_aggregation_strategies, + mock_snakemake, sets_path_to_root, update_p_nom_max, ) -from add_electricity import load_costs -from build_shapes import add_gdp_data, add_population_data -from pypsa.clustering.spatial import ( - busmap_by_greedy_modularity, - busmap_by_hac, - busmap_by_kmeans, - get_clustering_from_busmap, -) -from shapely.geometry import Point +from scripts.add_electricity import load_costs +from scripts.build_shapes import add_gdp_data, add_population_data idx = pd.IndexSlice @@ -429,7 +432,6 @@ def busmap_for_n_clusters( algorithm_kwds.setdefault("random_state", 0) def fix_country_assignment_for_hac(n): - from scipy.sparse import csgraph # overwrite country of nodes that are disconnected from their country-topology for country in n.buses.country.unique(): @@ -656,13 +658,12 @@ def cluster_regions(busmaps, inputs, output): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake( "cluster_network", network="elec", simpl="", clusters="min" ) sets_path_to_root("pypsa-earth") + configure_logging(snakemake) inputs, outputs, config = snakemake.input, snakemake.output, snakemake.config diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py index b822cc574..4ab6593ec 100644 --- a/scripts/download_osm_data.py +++ b/scripts/download_osm_data.py @@ -29,15 +29,18 @@ import pathlib import shutil -from _helpers import ( +from earth_osm import eo + +from scripts._helpers import ( change_to_script_dir, configure_logging, create_logger, get_current_directory_path, get_path, + mock_snakemake, read_osm_config, + sets_path_to_root, ) -from earth_osm import eo logger = create_logger(__name__) @@ -98,11 +101,10 @@ def convert_iso_to_geofk( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake, sets_path_to_root - change_to_script_dir(__file__) snakemake = mock_snakemake("download_osm_data") sets_path_to_root("pypsa-earth") + configure_logging(snakemake) run = snakemake.config.get("run", {}) diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py index dd410d33d..bc54c1b0f 100644 --- a/scripts/make_statistics.py +++ b/scripts/make_statistics.py @@ -29,17 +29,20 @@ import pandas as pd import pypsa import xarray as xr -from _helpers import ( +from shapely.validation import make_valid + +from scripts._helpers import ( change_to_script_dir, + create_country_list, create_logger, get_path_size, is_file_path, mock_snakemake, sets_path_to_root, + three_2_two_digits_country, to_csv_nafix, ) -from build_test_configs import create_test_config -from shapely.validation import make_valid +from scripts.build_test_configs import create_test_config logger = create_logger(__name__) @@ -77,7 +80,6 @@ def generate_scenario_by_country( out_dir : str (optional) Output directory where output configuration files are executed """ - from _helpers import create_country_list, three_2_two_digits_country clean_country_list = create_country_list(country_list) @@ -585,8 +587,6 @@ def calculate_stats( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("make_statistics") diff --git a/scripts/make_summary.py b/scripts/make_summary.py index 7bc3aa86c..fb2d4cf45 100644 --- a/scripts/make_summary.py +++ b/scripts/make_summary.py @@ -54,14 +54,17 @@ import pandas as pd import pypsa -from _helpers import ( + +from scripts._helpers import ( build_directory, change_to_script_dir, configure_logging, + create_logger, get_path, + mock_snakemake, path_exists, ) -from add_electricity import create_logger, load_costs, update_transmission_costs +from scripts.add_electricity import load_costs, update_transmission_costs idx = pd.IndexSlice @@ -539,8 +542,6 @@ def to_csv(dfs, dir): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake( "make_summary", diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py index 3eadb00ca..fcbc9c21d 100644 --- a/scripts/monte_carlo.py +++ b/scripts/monte_carlo.py @@ -73,11 +73,12 @@ import pandas as pd import pypsa import seaborn as sns -from _helpers import change_to_script_dir, configure_logging, create_logger from pyDOE2 import lhs from scipy.stats import beta, gamma, lognorm, norm, qmc, triang -from sklearn.preprocessing import MinMaxScaler -from solve_network import * +from sklearn.preprocessing import MinMaxScaler, minmax_scale + +from scripts._helpers import change_to_script_dir, configure_logging, create_logger +from scripts.solve_network import * logger = create_logger(__name__) sns.set(style="whitegrid") @@ -99,8 +100,6 @@ def monte_carlo_sampling_pydoe2( Adapted from Disspaset: https://github.com/energy-modelling-toolkit/Dispa-SET/blob/master/scripts/build_and_run_hypercube.py Documentation on PyDOE2: https://github.com/clicumu/pyDOE2 (fixes latin_cube errors) """ - from pyDOE2 import lhs - from scipy.stats import qmc # Generate a Nfeatures-dimensional latin hypercube varying between 0 and 1: lh = lhs( @@ -134,7 +133,6 @@ def monte_carlo_sampling_chaospy( Documentation on Chaospy: https://github.com/clicumu/pyDOE2 (fixes latin_cube errors) Documentation on Chaospy latin-hyper cube (quasi-Monte Carlo method): https://chaospy.readthedocs.io/en/master/user_guide/fundamentals/quasi_random_samples.html#Quasi-random-samples """ - from scipy.stats import qmc # generate a Nfeatures-dimensional latin hypercube varying between 0 and 1: N_FEATURES = "chaospy.Uniform(0, 1), " * N_FEATURES @@ -176,7 +174,6 @@ def monte_carlo_sampling_scipy( Documentation for Latin Hypercube: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.qmc.LatinHypercube.html#scipy.stats.qmc.LatinHypercube Orthogonal LHS is better than basic LHS: https://github.com/scipy/scipy/pull/14546/files, https://en.wikipedia.org/wiki/Latin_hypercube_sampling """ - from scipy.stats import qmc sampler = qmc.LatinHypercube( d=N_FEATURES, @@ -230,8 +227,6 @@ def rescale_distribution( - The function supports rescaling for uniform, normal, lognormal, triangle, beta, and gamma distributions. - The rescaled samples will have values in the range [0, 1]. """ - from scipy.stats import beta, gamma, lognorm, norm, qmc, triang - from sklearn.preprocessing import MinMaxScaler, minmax_scale for idx, value in enumerate(uncertainties_values): dist = value.get("type") @@ -349,8 +344,6 @@ def validate_parameters( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake( "monte_carlo", diff --git a/scripts/plot_network.py b/scripts/plot_network.py index 124c6c891..354dd50fd 100644 --- a/scripts/plot_network.py +++ b/scripts/plot_network.py @@ -23,16 +23,18 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from _helpers import ( +from matplotlib.legend_handler import HandlerPatch +from matplotlib.patches import Circle, Ellipse + +from scripts._helpers import ( aggregate_costs, aggregate_p, change_to_script_dir, configure_logging, create_logger, load_network_for_plots, + mock_snakemake, ) -from matplotlib.legend_handler import HandlerPatch -from matplotlib.patches import Circle, Ellipse to_rgba = mpl.colors.colorConverter.to_rgba @@ -357,8 +359,6 @@ def split_costs(n): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake( "plot_network", diff --git a/scripts/plot_summary.py b/scripts/plot_summary.py index f6f126a47..96ee48372 100644 --- a/scripts/plot_summary.py +++ b/scripts/plot_summary.py @@ -19,7 +19,14 @@ import matplotlib.pyplot as plt import pandas as pd -from _helpers import change_to_script_dir, configure_logging, create_logger, get_path + +from scripts._helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_path, + mock_snakemake, +) logger = create_logger(__name__) @@ -216,7 +223,6 @@ def plot_energy(infn, snmk, fn=None): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake change_to_script_dir(__file__) snakemake = mock_snakemake( diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py index 299d69280..cbf785804 100755 --- a/scripts/prepare_network.py +++ b/scripts/prepare_network.py @@ -65,14 +65,16 @@ import pandas as pd import pypsa import requests -from _helpers import ( + +from scripts._helpers import ( change_to_script_dir, configure_logging, create_logger, get_current_directory_path, get_path, + mock_snakemake, ) -from add_electricity import load_costs, update_transmission_costs +from scripts.add_electricity import load_costs, update_transmission_costs idx = pd.IndexSlice @@ -324,8 +326,6 @@ def set_line_nom_max(n, s_nom_max_set=np.inf, p_nom_max_set=np.inf): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake( "prepare_network", diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py index e3b0c191b..fa7741162 100644 --- a/scripts/retrieve_databundle_light.py +++ b/scripts/retrieve_databundle_light.py @@ -88,7 +88,10 @@ import geopandas as gpd import pandas as pd import yaml -from _helpers import ( +from google_drive_downloader import GoogleDriveDownloader as gdd +from tqdm import tqdm + +from scripts._helpers import ( change_to_script_dir, configure_logging, create_country_list, @@ -97,11 +100,10 @@ get_current_directory_path, get_path, get_relative_path, + mock_snakemake, progress_retrieve, sets_path_to_root, ) -from google_drive_downloader import GoogleDriveDownloader as gdd -from tqdm import tqdm logger = create_logger(__name__) @@ -815,8 +817,6 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level): if __name__ == "__main__": if "snakemake" not in globals(): change_to_script_dir(__file__) - from _helpers import mock_snakemake - snakemake = mock_snakemake("retrieve_databundle_light") # TODO Make logging compatible with progressbar (see PR #102, PyPSA-Eur) configure_logging(snakemake) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 48f18c4a9..263b98099 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -92,15 +92,6 @@ import pandas as pd import pypsa import scipy as sp -from _helpers import ( - change_to_script_dir, - configure_logging, - create_logger, - get_aggregation_strategies, - update_p_nom_max, -) -from add_electricity import load_costs -from cluster_network import cluster_regions, clustering_for_n_clusters from pypsa.clustering.spatial import ( aggregategenerators, aggregateoneport, @@ -110,6 +101,17 @@ from pypsa.io import import_components_from_dataframe, import_series_from_dataframe from scipy.sparse.csgraph import connected_components, dijkstra +from scripts._helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_aggregation_strategies, + mock_snakemake, + update_p_nom_max, +) +from scripts.add_electricity import load_costs +from scripts.cluster_network import cluster_regions, clustering_for_n_clusters + sys.settrace logger = create_logger(__name__) @@ -961,8 +963,6 @@ def merge_isolated_nodes(n, threshold, aggregation_strategies=dict()): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake("simplify_network", simpl="") configure_logging(snakemake) diff --git a/scripts/solve_network.py b/scripts/solve_network.py index 6f7dfdcdb..555bee935 100755 --- a/scripts/solve_network.py +++ b/scripts/solve_network.py @@ -82,12 +82,6 @@ import numpy as np import pandas as pd import pypsa -from _helpers import ( - build_directory, - change_to_script_dir, - configure_logging, - create_logger, -) from pypsa.descriptors import get_switchable_as_dense as get_as_dense from pypsa.linopf import ( define_constraints, @@ -99,6 +93,14 @@ network_lopf, ) +from scripts._helpers import ( + build_directory, + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) + logger = create_logger(__name__) @@ -545,8 +547,6 @@ def solve_network(n, config, opts="", **kwargs): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - change_to_script_dir(__file__) snakemake = mock_snakemake( "solve_network", diff --git a/test/test_prepare_network.py b/test/test_prepare_network.py index 3181b444e..9bad7c220 100644 --- a/test/test_prepare_network.py +++ b/test/test_prepare_network.py @@ -5,7 +5,9 @@ # -*- coding: utf-8 -*- -# from scripts.prepare_network import download_emission_data -# def test_download_emission_data(): -# filename = download_emission_data() -# assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls" +from scripts.prepare_network import download_emission_data + + +def test_download_emission_data(): + filename = download_emission_data() + assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls" From bae5586a3186955f7788a8ab81b26e3f3a7a70a1 Mon Sep 17 00:00:00 2001 From: Davide Fioriti <67809479+davide-f@users.noreply.github.com> Date: Fri, 7 Jun 2024 13:33:19 +0200 Subject: [PATCH 18/23] Update configs - Anton (#1037) --- config.default.yaml | 7 +++++++ config.tutorial.yaml | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/config.default.yaml b/config.default.yaml index 2a09a18eb..91ae2c53f 100644 --- a/config.default.yaml +++ b/config.default.yaml @@ -481,13 +481,20 @@ plotting: "electricity": "#f9d002" "lines": "#70af1d" "transmission lines": "#70af1d" + "AC": "#70af1d" "AC-AC": "#70af1d" "AC line": "#70af1d" "links": "#8a1caf" "HVDC links": "#8a1caf" + "DC": "#8a1caf" "DC-DC": "#8a1caf" "DC link": "#8a1caf" "load": "#ff0000" + "load shedding": "#ff0000" + "battery discharger": slategray + "battery charger": slategray + "h2 fuel cell": '#c251ae' + "h2 electrolysis": '#ff29d9' "csp": "#fdd404" nice_names: OCGT: "Open-Cycle Gas" diff --git a/config.tutorial.yaml b/config.tutorial.yaml index 7919ed175..e6be7cf5b 100644 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -467,13 +467,20 @@ plotting: "electricity": "#f9d002" "lines": "#70af1d" "transmission lines": "#70af1d" + "AC": "#70af1d" "AC-AC": "#70af1d" "AC line": "#70af1d" "links": "#8a1caf" "HVDC links": "#8a1caf" + "DC": "#8a1caf" "DC-DC": "#8a1caf" "DC link": "#8a1caf" "load": "#ff0000" + "load shedding": "#ff0000" + "battery discharger": slategray + "battery charger": slategray + "h2 fuel cell": '#c251ae' + "h2 electrolysis": '#ff29d9' "csp": "#fdd404" nice_names: OCGT: "Open-Cycle Gas" From d8430b05e5ed7e1d90b767fad65c8ea98313f630 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Fri, 7 Jun 2024 13:35:56 +0200 Subject: [PATCH 19/23] move load_network_for_plots to plot_network --- scripts/_helpers.py | 36 ------------------------------------ scripts/plot_network.py | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 6a1ba4337..5544be8a3 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -30,8 +30,6 @@ from snakemake.script import Snakemake from tqdm import tqdm -from scripts.add_electricity import load_costs, update_transmission_costs - logger = logging.getLogger(__name__) # list of recognised nan values (NA and na excluded as may be confused with Namibia 2-letter country code) @@ -252,40 +250,6 @@ def load_network(import_name=None, custom_components=None): ) -def load_network_for_plots( - fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True -): - - n = pypsa.Network(fn) - - n.loads["carrier"] = n.loads.bus.map(n.buses.carrier) + " load" - n.stores["carrier"] = n.stores.bus.map(n.buses.carrier) - - n.links["carrier"] = ( - n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier) - ) - n.lines["carrier"] = "AC line" - n.transformers["carrier"] = "AC transformer" - - n.lines["s_nom"] = n.lines["s_nom_min"] - n.links["p_nom"] = n.links["p_nom_min"] - - if combine_hydro_ps: - n.storage_units.loc[ - n.storage_units.carrier.isin({"PHS", "hydro"}), "carrier" - ] = "hydro+PHS" - - # if the carrier was not set on the heat storage units - # bus_carrier = n.storage_units.bus.map(n.buses.carrier) - # n.storage_units.loc[bus_carrier == "heat","carrier"] = "water tanks" - - Nyears = n.snapshot_weightings.objective.sum() / 8760.0 - costs = load_costs(tech_costs, cost_config, elec_config, Nyears) - update_transmission_costs(n, costs) - - return n - - def update_p_nom_max(n): """ If extendable carriers (solar/onwind/...) have capacity >= 0, e.g. existing diff --git a/scripts/plot_network.py b/scripts/plot_network.py index 354dd50fd..d4f635ede 100644 --- a/scripts/plot_network.py +++ b/scripts/plot_network.py @@ -23,6 +23,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd +import pypsa from matplotlib.legend_handler import HandlerPatch from matplotlib.patches import Circle, Ellipse @@ -32,9 +33,9 @@ change_to_script_dir, configure_logging, create_logger, - load_network_for_plots, mock_snakemake, ) +from scripts.add_electricity import load_costs, update_transmission_costs to_rgba = mpl.colors.colorConverter.to_rgba @@ -357,6 +358,40 @@ def split_costs(n): ax.grid(True, axis="y", color="k", linestyle="dotted") +def load_network_for_plots( + fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True +): + + n = pypsa.Network(fn) + + n.loads["carrier"] = n.loads.bus.map(n.buses.carrier) + " load" + n.stores["carrier"] = n.stores.bus.map(n.buses.carrier) + + n.links["carrier"] = ( + n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier) + ) + n.lines["carrier"] = "AC line" + n.transformers["carrier"] = "AC transformer" + + n.lines["s_nom"] = n.lines["s_nom_min"] + n.links["p_nom"] = n.links["p_nom_min"] + + if combine_hydro_ps: + n.storage_units.loc[ + n.storage_units.carrier.isin({"PHS", "hydro"}), "carrier" + ] = "hydro+PHS" + + # if the carrier was not set on the heat storage units + # bus_carrier = n.storage_units.bus.map(n.buses.carrier) + # n.storage_units.loc[bus_carrier == "heat","carrier"] = "water tanks" + + Nyears = n.snapshot_weightings.objective.sum() / 8760.0 + costs = load_costs(tech_costs, cost_config, elec_config, Nyears) + update_transmission_costs(n, costs) + + return n + + if __name__ == "__main__": if "snakemake" not in globals(): change_to_script_dir(__file__) From 451f5697aab2099e42c631319ef00a04b0684d9f Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Fri, 7 Jun 2024 13:57:27 +0200 Subject: [PATCH 20/23] main to oet_main in workflow files --- .github/workflows/ci-linux.yaml | 4 ++-- .github/workflows/ci-mac.yaml | 4 ++-- .github/workflows/ci-windows.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml index 8c99cf581..a0e0299c4 100644 --- a/.github/workflows/ci-linux.yaml +++ b/.github/workflows/ci-linux.yaml @@ -3,10 +3,10 @@ name: CI-linux on: push: branches: - - main + - oet_main pull_request: branches: - - main + - oet_main schedule: - cron: "0 5 * * TUE" diff --git a/.github/workflows/ci-mac.yaml b/.github/workflows/ci-mac.yaml index b042f8b3d..4cb98fa97 100644 --- a/.github/workflows/ci-mac.yaml +++ b/.github/workflows/ci-mac.yaml @@ -3,10 +3,10 @@ name: CI-mac on: push: branches: - - main + - oet_main pull_request: branches: - - main + - oet_main schedule: - cron: "0 5 * * TUE" diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml index a288b65e1..14d56e903 100644 --- a/.github/workflows/ci-windows.yaml +++ b/.github/workflows/ci-windows.yaml @@ -3,10 +3,10 @@ name: CI-windows on: push: branches: - - main + - oet_main pull_request: branches: - - main + - oet_main schedule: - cron: "0 5 * * TUE" From 7a7153784df1da7b4b9e42ce6d8510fe11d8208a Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Fri, 7 Jun 2024 14:04:14 +0200 Subject: [PATCH 21/23] modify Snakefile --- Snakefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Snakefile b/Snakefile index 32c92ba5b..12484ff62 100644 --- a/Snakefile +++ b/Snakefile @@ -4,16 +4,16 @@ import sys -sys.path.append("./scripts") +# sys.path.append("./scripts") from os.path import normpath, exists from shutil import copyfile, move from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider -from _helpers import create_country_list, get_last_commit_message -from build_demand_profiles import get_load_paths_gegis -from retrieve_databundle_light import datafiles_retrivedatabundle +from scripts._helpers import create_country_list, get_last_commit_message +from scripts.build_demand_profiles import get_load_paths_gegis +from scripts.retrieve_databundle_light import datafiles_retrivedatabundle from pathlib import Path HTTP = HTTPRemoteProvider() From b5228793e0a7e358d548147d763f90be3486ceca Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Fri, 7 Jun 2024 15:10:41 +0200 Subject: [PATCH 22/23] remove .scripts/ --- Snakefile | 8 ++++---- scripts/add_electricity.py | 3 +-- scripts/add_extra_components.py | 5 ++--- scripts/augmented_line_connections.py | 11 +++++------ scripts/base_network.py | 5 ++--- scripts/build_bus_regions.py | 7 +++---- scripts/build_cutout.py | 3 +-- scripts/build_demand_profiles.py | 7 +++---- scripts/build_natura_raster.py | 9 ++++----- scripts/build_osm_network.py | 9 ++++----- scripts/build_powerplants.py | 7 +++---- scripts/build_renewable_profiles.py | 11 +++++------ scripts/build_shapes.py | 21 ++++++++++----------- scripts/build_test_configs.py | 5 ++--- scripts/clean_osm_data.py | 3 +-- scripts/cluster_network.py | 23 +++++++++++------------ scripts/download_osm_data.py | 10 +++++++--- scripts/make_statistics.py | 7 +++---- scripts/make_summary.py | 5 ++--- scripts/monte_carlo.py | 5 ++--- scripts/plot_network.py | 9 ++++----- scripts/plot_summary.py | 3 +-- scripts/prepare_network.py | 5 ++--- scripts/retrieve_databundle_light.py | 7 +++---- scripts/simplify_network.py | 21 ++++++++++----------- scripts/solve_network.py | 15 +++++++-------- test/test_helpers.py | 5 ++++- test/test_prepare_network.py | 6 +++++- 28 files changed, 111 insertions(+), 124 deletions(-) diff --git a/Snakefile b/Snakefile index 12484ff62..32c92ba5b 100644 --- a/Snakefile +++ b/Snakefile @@ -4,16 +4,16 @@ import sys -# sys.path.append("./scripts") +sys.path.append("./scripts") from os.path import normpath, exists from shutil import copyfile, move from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider -from scripts._helpers import create_country_list, get_last_commit_message -from scripts.build_demand_profiles import get_load_paths_gegis -from scripts.retrieve_databundle_light import datafiles_retrivedatabundle +from _helpers import create_country_list, get_last_commit_message +from build_demand_profiles import get_load_paths_gegis +from retrieve_databundle_light import datafiles_retrivedatabundle from pathlib import Path HTTP = HTTPRemoteProvider() diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index 99738ca1c..f953d853b 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -89,8 +89,7 @@ import powerplantmatching as pm import pypsa import xarray as xr - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, diff --git a/scripts/add_extra_components.py b/scripts/add_extra_components.py index 46e865093..69f139a79 100644 --- a/scripts/add_extra_components.py +++ b/scripts/add_extra_components.py @@ -57,14 +57,13 @@ import numpy as np import pandas as pd import pypsa - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, mock_snakemake, ) -from scripts.add_electricity import ( +from add_electricity import ( _add_missing_carriers_from_costs, add_nice_carrier_names, load_costs, diff --git a/scripts/augmented_line_connections.py b/scripts/augmented_line_connections.py index acd4af727..0bddebb30 100644 --- a/scripts/augmented_line_connections.py +++ b/scripts/augmented_line_connections.py @@ -33,17 +33,16 @@ import numpy as np import pandas as pd import pypsa -from networkx.algorithms import complement -from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation -from pypsa.geo import haversine_pts - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, mock_snakemake, ) -from scripts.add_electricity import load_costs +from add_electricity import load_costs +from networkx.algorithms import complement +from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation +from pypsa.geo import haversine_pts logger = create_logger(__name__) diff --git a/scripts/base_network.py b/scripts/base_network.py index c52514cba..8c2131512 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -64,9 +64,7 @@ import scipy as sp import shapely.prepared import shapely.wkt -from shapely.ops import unary_union - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, @@ -74,6 +72,7 @@ mock_snakemake, read_csv_nafix, ) +from shapely.ops import unary_union logger = create_logger(__name__) diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py index 7326abd96..9af7f1be9 100644 --- a/scripts/build_bus_regions.py +++ b/scripts/build_bus_regions.py @@ -47,16 +47,15 @@ import numpy as np import pandas as pd import pypsa -from scipy.spatial import Voronoi -from shapely.geometry import Polygon - -from scripts._helpers import ( +from _helpers import ( REGION_COLS, change_to_script_dir, configure_logging, create_logger, mock_snakemake, ) +from scipy.spatial import Voronoi +from shapely.geometry import Polygon logger = create_logger(__name__) diff --git a/scripts/build_cutout.py b/scripts/build_cutout.py index 297f449a9..186e52ab6 100644 --- a/scripts/build_cutout.py +++ b/scripts/build_cutout.py @@ -97,8 +97,7 @@ import atlite import geopandas as gpd import pandas as pd - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py index 5cfdca1db..747c90f84 100644 --- a/scripts/build_demand_profiles.py +++ b/scripts/build_demand_profiles.py @@ -49,10 +49,7 @@ import pypsa import scipy.sparse as sparse import xarray as xr -from shapely.prepared import prep -from shapely.validation import make_valid - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, @@ -61,6 +58,8 @@ read_osm_config, sets_path_to_root, ) +from shapely.prepared import prep +from shapely.validation import make_valid logger = create_logger(__name__) diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py index e71c3df2c..af066fd1b 100644 --- a/scripts/build_natura_raster.py +++ b/scripts/build_natura_raster.py @@ -51,11 +51,7 @@ import numpy as np import pandas as pd import rasterio as rio -from rasterio.features import geometry_mask -from rasterio.warp import transform_bounds -from shapely.ops import unary_union - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, @@ -63,6 +59,9 @@ is_directory_path, mock_snakemake, ) +from rasterio.features import geometry_mask +from rasterio.warp import transform_bounds +from shapely.ops import unary_union logger = create_logger(__name__) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 9706483f7..1ab20b70a 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -8,11 +8,7 @@ import geopandas as gpd import numpy as np import pandas as pd -from shapely.geometry import LineString, Point -from shapely.ops import linemerge, split -from tqdm import tqdm - -from scripts._helpers import ( +from _helpers import ( build_directory, change_to_script_dir, configure_logging, @@ -23,6 +19,9 @@ sets_path_to_root, to_csv_nafix, ) +from shapely.geometry import LineString, Point +from shapely.ops import linemerge, split +from tqdm import tqdm logger = create_logger(__name__) diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py index 8696de4a1..e1f8e91b7 100644 --- a/scripts/build_powerplants.py +++ b/scripts/build_powerplants.py @@ -106,10 +106,7 @@ import powerplantmatching as pm import pypsa import yaml -from scipy.spatial import cKDTree as KDTree -from shapely.geometry import Point - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, @@ -121,6 +118,8 @@ to_csv_nafix, two_digits_2_name_country, ) +from scipy.spatial import cKDTree as KDTree +from shapely.geometry import Point logger = create_logger(__name__) diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py index cb5740303..0ec3f3e3a 100644 --- a/scripts/build_renewable_profiles.py +++ b/scripts/build_renewable_profiles.py @@ -201,18 +201,17 @@ import pandas as pd import progressbar as pgb import xarray as xr -from add_electricity import load_powerplants -from dask.distributed import Client, LocalCluster -from pypsa.geo import haversine -from shapely.geometry import LineString, Point, box - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, mock_snakemake, sets_path_to_root, ) +from add_electricity import load_powerplants +from dask.distributed import Client, LocalCluster +from pypsa.geo import haversine +from shapely.geometry import LineString, Point, box cc = coco.CountryConverter() diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index 5ba734438..faea62a13 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -18,17 +18,7 @@ import rasterio import requests import xarray as xr -from numba import njit -from numba.core import types -from numba.typed import Dict -from rasterio.mask import mask -from rasterio.windows import Window -from shapely.geometry import MultiPolygon -from shapely.ops import unary_union -from shapely.validation import make_valid -from tqdm import tqdm - -from scripts._helpers import ( +from _helpers import ( build_directory, change_to_script_dir, configure_logging, @@ -43,6 +33,15 @@ two_2_three_digits_country, two_digits_2_name_country, ) +from numba import njit +from numba.core import types +from numba.typed import Dict +from rasterio.mask import mask +from rasterio.windows import Window +from shapely.geometry import MultiPolygon +from shapely.ops import unary_union +from shapely.validation import make_valid +from tqdm import tqdm sets_path_to_root("pypsa-earth") diff --git a/scripts/build_test_configs.py b/scripts/build_test_configs.py index 7e066e0db..0dec51ae0 100644 --- a/scripts/build_test_configs.py +++ b/scripts/build_test_configs.py @@ -15,14 +15,13 @@ import collections.abc import copy -from ruamel.yaml import YAML - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, get_current_directory_path, get_path, mock_snakemake, ) +from ruamel.yaml import YAML def update(d, u): diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index e762e2019..fc5edbb69 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -9,8 +9,7 @@ import numpy as np import pandas as pd import reverse_geocode as rg - -from scripts._helpers import ( +from _helpers import ( REGION_COLS, change_to_script_dir, configure_logging, diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index b0ac236c1..de7d538fc 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -129,16 +129,7 @@ import pandas as pd import pyomo.environ as po import pypsa -from pypsa.clustering.spatial import ( - busmap_by_greedy_modularity, - busmap_by_hac, - busmap_by_kmeans, - get_clustering_from_busmap, -) -from scipy.sparse import csgraph -from shapely.geometry import Point - -from scripts._helpers import ( +from _helpers import ( REGION_COLS, change_to_script_dir, configure_logging, @@ -148,8 +139,16 @@ sets_path_to_root, update_p_nom_max, ) -from scripts.add_electricity import load_costs -from scripts.build_shapes import add_gdp_data, add_population_data +from add_electricity import load_costs +from build_shapes import add_gdp_data, add_population_data +from pypsa.clustering.spatial import ( + busmap_by_greedy_modularity, + busmap_by_hac, + busmap_by_kmeans, + get_clustering_from_busmap, +) +from scipy.sparse import csgraph +from shapely.geometry import Point idx = pd.IndexSlice diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py index 4ab6593ec..2a1e366d2 100644 --- a/scripts/download_osm_data.py +++ b/scripts/download_osm_data.py @@ -26,12 +26,15 @@ - ``data/osm/out``: Prepared power data as .geojson and .csv files per country - ``resources/osm/raw``: Prepared and per type (e.g. cable/lines) aggregated power data as .geojson and .csv files """ + +import sys + +print("sys path download_osm_data", sys.path) + import pathlib import shutil -from earth_osm import eo - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, @@ -41,6 +44,7 @@ read_osm_config, sets_path_to_root, ) +from earth_osm import eo logger = create_logger(__name__) diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py index bc54c1b0f..a62499b6e 100644 --- a/scripts/make_statistics.py +++ b/scripts/make_statistics.py @@ -29,9 +29,7 @@ import pandas as pd import pypsa import xarray as xr -from shapely.validation import make_valid - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, create_country_list, create_logger, @@ -42,7 +40,8 @@ three_2_two_digits_country, to_csv_nafix, ) -from scripts.build_test_configs import create_test_config +from build_test_configs import create_test_config +from shapely.validation import make_valid logger = create_logger(__name__) diff --git a/scripts/make_summary.py b/scripts/make_summary.py index fb2d4cf45..390bcd66f 100644 --- a/scripts/make_summary.py +++ b/scripts/make_summary.py @@ -54,8 +54,7 @@ import pandas as pd import pypsa - -from scripts._helpers import ( +from _helpers import ( build_directory, change_to_script_dir, configure_logging, @@ -64,7 +63,7 @@ mock_snakemake, path_exists, ) -from scripts.add_electricity import load_costs, update_transmission_costs +from add_electricity import load_costs, update_transmission_costs idx = pd.IndexSlice diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py index fcbc9c21d..9ba7d4948 100644 --- a/scripts/monte_carlo.py +++ b/scripts/monte_carlo.py @@ -73,12 +73,11 @@ import pandas as pd import pypsa import seaborn as sns +from _helpers import change_to_script_dir, configure_logging, create_logger from pyDOE2 import lhs from scipy.stats import beta, gamma, lognorm, norm, qmc, triang from sklearn.preprocessing import MinMaxScaler, minmax_scale - -from scripts._helpers import change_to_script_dir, configure_logging, create_logger -from scripts.solve_network import * +from solve_network import * logger = create_logger(__name__) sns.set(style="whitegrid") diff --git a/scripts/plot_network.py b/scripts/plot_network.py index d4f635ede..07c0115e4 100644 --- a/scripts/plot_network.py +++ b/scripts/plot_network.py @@ -24,10 +24,7 @@ import numpy as np import pandas as pd import pypsa -from matplotlib.legend_handler import HandlerPatch -from matplotlib.patches import Circle, Ellipse - -from scripts._helpers import ( +from _helpers import ( aggregate_costs, aggregate_p, change_to_script_dir, @@ -35,7 +32,9 @@ create_logger, mock_snakemake, ) -from scripts.add_electricity import load_costs, update_transmission_costs +from add_electricity import load_costs, update_transmission_costs +from matplotlib.legend_handler import HandlerPatch +from matplotlib.patches import Circle, Ellipse to_rgba = mpl.colors.colorConverter.to_rgba diff --git a/scripts/plot_summary.py b/scripts/plot_summary.py index 96ee48372..d89ef53ec 100644 --- a/scripts/plot_summary.py +++ b/scripts/plot_summary.py @@ -19,8 +19,7 @@ import matplotlib.pyplot as plt import pandas as pd - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py index cbf785804..59c34ea3a 100755 --- a/scripts/prepare_network.py +++ b/scripts/prepare_network.py @@ -65,8 +65,7 @@ import pandas as pd import pypsa import requests - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_logger, @@ -74,7 +73,7 @@ get_path, mock_snakemake, ) -from scripts.add_electricity import load_costs, update_transmission_costs +from add_electricity import load_costs, update_transmission_costs idx = pd.IndexSlice diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py index fa7741162..0f369ed27 100644 --- a/scripts/retrieve_databundle_light.py +++ b/scripts/retrieve_databundle_light.py @@ -88,10 +88,7 @@ import geopandas as gpd import pandas as pd import yaml -from google_drive_downloader import GoogleDriveDownloader as gdd -from tqdm import tqdm - -from scripts._helpers import ( +from _helpers import ( change_to_script_dir, configure_logging, create_country_list, @@ -104,6 +101,8 @@ progress_retrieve, sets_path_to_root, ) +from google_drive_downloader import GoogleDriveDownloader as gdd +from tqdm import tqdm logger = create_logger(__name__) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 263b98099..eae4ed1f1 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -92,6 +92,16 @@ import pandas as pd import pypsa import scipy as sp +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_aggregation_strategies, + mock_snakemake, + update_p_nom_max, +) +from add_electricity import load_costs +from cluster_network import cluster_regions, clustering_for_n_clusters from pypsa.clustering.spatial import ( aggregategenerators, aggregateoneport, @@ -101,17 +111,6 @@ from pypsa.io import import_components_from_dataframe, import_series_from_dataframe from scipy.sparse.csgraph import connected_components, dijkstra -from scripts._helpers import ( - change_to_script_dir, - configure_logging, - create_logger, - get_aggregation_strategies, - mock_snakemake, - update_p_nom_max, -) -from scripts.add_electricity import load_costs -from scripts.cluster_network import cluster_regions, clustering_for_n_clusters - sys.settrace logger = create_logger(__name__) diff --git a/scripts/solve_network.py b/scripts/solve_network.py index 555bee935..f057b8823 100755 --- a/scripts/solve_network.py +++ b/scripts/solve_network.py @@ -82,6 +82,13 @@ import numpy as np import pandas as pd import pypsa +from _helpers import ( + build_directory, + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) from pypsa.descriptors import get_switchable_as_dense as get_as_dense from pypsa.linopf import ( define_constraints, @@ -93,14 +100,6 @@ network_lopf, ) -from scripts._helpers import ( - build_directory, - change_to_script_dir, - configure_logging, - create_logger, - mock_snakemake, -) - logger = create_logger(__name__) diff --git a/test/test_helpers.py b/test/test_helpers.py index 8153ee622..4f41c5740 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -8,6 +8,7 @@ import os import pathlib import shutil +import sys from test.conftest import ( _content_temp_file, _name_temp_file, @@ -19,7 +20,9 @@ import numpy as np import pandas as pd -from scripts._helpers import ( +sys.path.append("./scripts") + +from _helpers import ( aggregate_fuels, build_directory, change_to_script_dir, diff --git a/test/test_prepare_network.py b/test/test_prepare_network.py index 9bad7c220..914089614 100644 --- a/test/test_prepare_network.py +++ b/test/test_prepare_network.py @@ -5,7 +5,11 @@ # -*- coding: utf-8 -*- -from scripts.prepare_network import download_emission_data +import sys + +sys.path.append("./scripts") + +from prepare_network import download_emission_data def test_download_emission_data(): From e2abe3074c3eb638f5b3ed0d3fb18ecaac2fafd9 Mon Sep 17 00:00:00 2001 From: Fabrizio Finozzi Date: Fri, 7 Jun 2024 19:50:40 +0200 Subject: [PATCH 23/23] remove some abstractions --- scripts/_helpers.py | 76 ++++------------------------ scripts/build_natura_raster.py | 6 +-- scripts/build_shapes.py | 16 +++--- scripts/make_statistics.py | 19 +++---- scripts/make_summary.py | 5 +- scripts/retrieve_databundle_light.py | 7 ++- test/test_helpers.py | 65 ------------------------ 7 files changed, 36 insertions(+), 158 deletions(-) diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 5544be8a3..d07951f13 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -110,9 +110,9 @@ def read_osm_config(*args): {"Africa": {"DZ": "algeria", ...}, ...} """ if "__file__" in globals(): - base_folder = get_dirname_path(__file__) - if not path_exists(get_path(base_folder, "configs")): - base_folder = get_dirname_path(base_folder) + base_folder = pathlib.Path(__file__).parent + if not pathlib.Path(get_path(base_folder, "configs")).exists(): + base_folder = pathlib.Path(base_folder).parent else: base_folder = get_current_directory_path() osm_config_path = get_path(base_folder, "configs", REGIONS_CONFIG) @@ -144,7 +144,7 @@ def sets_path_to_root(root_directory_name, n=8): while n >= 0: n -= 1 # if repo_name is current folder name, stop and set path - if repo_name == get_basename_abs_path("."): + if repo_name == pathlib.Path(".").absolute().name: repo_path = get_current_directory_path() # current_path os.chdir(repo_path) # change dir_path to repo_path print("This is the repository path: ", repo_path) @@ -182,7 +182,7 @@ def configure_logging(snakemake, skip_handlers=False): if skip_handlers is False: fallback_path = get_path( - get_dirname_path(__file__), "..", "logs", f"{snakemake.rule}.log" + pathlib.Path(__file__).parent, "..", "logs", f"{snakemake.rule}.log" ) logfile = snakemake.log.get( "python", snakemake.log[0] if snakemake.log else fallback_path @@ -440,7 +440,7 @@ def mock_snakemake(rule_name, **wildcards): ), f"mock_snakemake has to be run from the repository scripts directory {script_dir}" os.chdir(script_dir.parent) for p in sm.SNAKEFILE_CHOICES: - if path_exists(p): + if pathlib.Path(p).exists(): snakefile = p break workflow = sm.Workflow( @@ -464,7 +464,7 @@ def mock_snakemake(rule_name, **wildcards): def make_accessable(*ios): for io in ios: for i in range(len(io)): - io[i] = get_abs_path(io[i]) + io[i] = pathlib.Path(io[i]).absolute() make_accessable(job.input, job.output, job.log) snakemake = Snakemake( @@ -784,35 +784,6 @@ def get_last_commit_message(path): return last_commit_message -def get_dirname_path(path): - """ - It returns the directory name of the path. - """ - return pathlib.Path(path).parent - - -def get_abs_path(path): - """ - It returns the absolutized version of the path. - """ - return pathlib.Path(path).absolute() - - -def get_basename_abs_path(path): - """ - It returns the base name of a normalized and absolutized version of the - path. - """ - return pathlib.Path(path).absolute().name - - -def get_basename_path(path): - """ - It returns the base name of the path. - """ - return pathlib.Path(path).name - - def get_path(*args): """ It returns a new path string. @@ -868,24 +839,6 @@ def get_current_directory_path(): return pathlib.Path.cwd() -def is_directory_path(path): - """ - It returns True if the path points to a directory. - - False otherwise. - """ - return pathlib.Path(path).is_dir() - - -def is_file_path(path): - """ - It returns True if the path points to a file. - - False otherwise. - """ - return pathlib.Path(path).is_file() - - def get_relative_path(path, start_path="."): """ It returns a relative path to path from start_path. @@ -895,15 +848,6 @@ def get_relative_path(path, start_path="."): return pathlib.Path(path).relative_to(start_path) -def path_exists(path): - """ - It returns True if the path exists. - - False otherwise. - """ - return pathlib.Path(path).exists() - - def create_network_topology(n, prefix, connector=" <-> ", bidirectional=True): """ Create a network topology like the power transmission network. @@ -1004,7 +948,7 @@ def download_gadm(country_code, update=False, out_logging=False): gadm_filename + ".gpkg", ) # Input filepath gpkg - if not path_exists(gadm_input_file_gpkg) or update is True: + if not pathlib.Path(gadm_input_file_gpkg).exists() or update is True: if out_logging: _logger.warning( f"Stage 4/4: {gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_zip}" @@ -1017,7 +961,7 @@ def download_gadm(country_code, update=False, out_logging=False): shutil.copyfileobj(r.raw, f) with zipfile.ZipFile(gadm_input_file_zip, "r") as zip_ref: - zip_ref.extractall(get_dirname_path(gadm_input_file_zip)) + zip_ref.extractall(pathlib.Path(gadm_input_file_zip).parent) return gadm_input_file_gpkg, gadm_filename @@ -1153,7 +1097,7 @@ def override_component_attrs(directory): for component, list_name in components.list_name.items(): fn = f"{directory}/{list_name}.csv" - if is_file_path(fn): + if pathlib.Path(fn).is_file(): overrides = pd.read_csv(fn, index_col=0, na_values="n/a") attrs[component] = overrides.combine_first(attrs[component]) diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py index af066fd1b..2cea6f681 100644 --- a/scripts/build_natura_raster.py +++ b/scripts/build_natura_raster.py @@ -45,6 +45,7 @@ The output is a raster file with the name `natura.tiff` in the folder `resources/natura/`. """ import os +import pathlib import atlite import geopandas as gpd @@ -56,7 +57,6 @@ configure_logging, create_logger, get_path, - is_directory_path, mock_snakemake, ) from rasterio.features import geometry_mask @@ -74,9 +74,9 @@ def get_fileshapes(list_paths, accepted_formats=(".shp",)): list_fileshapes = [] for lf in list_paths: - if is_directory_path( + if pathlib.Path( lf - ): # if it is a folder, then list all shapes files contained + ).is_dir(): # if it is a folder, then list all shapes files contained # loop over all dirs and subdirs for path, subdirs, files in os.walk(lf): # loop over all files diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index faea62a13..f6b4d5874 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -24,10 +24,8 @@ configure_logging, create_logger, get_current_directory_path, - get_dirname_path, get_path, mock_snakemake, - path_exists, sets_path_to_root, three_2_two_digits_country, two_2_three_digits_country, @@ -103,7 +101,7 @@ def download_GADM(country_code, update=False, out_logging=False): GADM_filename + ".gpkg", ) # Input filepath gpkg - if not path_exists(GADM_inputfile_gpkg) or update is True: + if not pathlib.Path(GADM_inputfile_gpkg).exists() or update is True: if out_logging: logger.warning( f"Stage 5 of 5: {GADM_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {GADM_inputfile_gpkg}" @@ -325,9 +323,9 @@ def load_EEZ(countries_codes, geo_crs, EEZ_gpkg="./data/eez/eez_v11.gpkg"): The dataset shall be downloaded independently by the user (see guide) or together with pypsa-earth package. """ - if not path_exists(EEZ_gpkg): + if not pathlib.Path(EEZ_gpkg).exists(): raise Exception( - f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {get_dirname_path(EEZ_gpkg)}" + f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {pathlib.Path(EEZ_gpkg).parent}" ) geodf_EEZ = gpd.read_file(EEZ_gpkg, engine="pyogrio").to_crs(geo_crs) @@ -489,7 +487,7 @@ def download_WorldPop_standard( get_current_directory_path(), "data", "WorldPop", WorldPop_filename ) # Input filepath tif - if not path_exists(WorldPop_inputfile) or update is True: + if not pathlib.Path(WorldPop_inputfile).exists() or update is True: if out_logging: logger.warning( f"Stage 3 of 5: {WorldPop_filename} does not exist, downloading to {WorldPop_inputfile}" @@ -587,9 +585,9 @@ def convert_GDP(name_file_nc, year=2015, out_logging=False): ) # Input filepath nc # Check if file exists, otherwise throw exception - if not path_exists(GDP_nc): + if not pathlib.Path(GDP_nc).exists(): raise Exception( - f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {get_dirname_path(GDP_nc)}" + f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {pathlib.Path(GDP_nc).parent}" ) # open nc dataset @@ -632,7 +630,7 @@ def load_GDP( get_current_directory_path(), "data", "GDP", name_file_tif ) # Input filepath tif - if update | (not path_exists(GDP_tif)): + if update | (not pathlib.Path(GDP_tif).exists()): if out_logging: logger.warning( f"Stage 5 of 5: File {name_file_tif} not found, the file will be produced by processing {name_file_nc}" diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py index a62499b6e..18a2e3a23 100644 --- a/scripts/make_statistics.py +++ b/scripts/make_statistics.py @@ -24,6 +24,8 @@ This rule creates a dataframe containing in the columns the relevant statistics for the current run. """ +import pathlib + import geopandas as gpd import numpy as np import pandas as pd @@ -34,7 +36,6 @@ create_country_list, create_logger, get_path_size, - is_file_path, mock_snakemake, sets_path_to_root, three_2_two_digits_country, @@ -130,7 +131,7 @@ def collect_basic_osm_stats(path, rulename, header): """ Collect basic statistics on OSM data: number of items """ - if is_file_path(path) and get_path_size(path) > 0: + if pathlib.Path(path).is_file() and get_path_size(path) > 0: df = gpd.read_file(path) n_elem = len(df) @@ -149,7 +150,7 @@ def collect_network_osm_stats(path, rulename, header, metric_crs="EPSG:3857"): - length of the stored shapes - length of objects with tag_frequency == 0 (DC elements) """ - if is_file_path(path) and get_path_size(path) > 0: + if pathlib.Path(path).is_file() and get_path_size(path) > 0: df = gpd.read_file(path) n_elem = len(df) obj_length = ( @@ -251,7 +252,7 @@ def collect_bus_regions_stats(bus_region_rule="build_bus_regions"): df = pd.DataFrame() - if is_file_path(fp_onshore) and is_file_path(fp_offshore): + if pathlib.Path(fp_onshore).is_file() and pathlib.Path(fp_offshore).is_file(): gdf_onshore = gpd.read_file(fp_onshore) gdf_offshore = gpd.read_file(fp_offshore) @@ -293,7 +294,7 @@ def capacity_stats(df): else: return df.groupby("carrier").p_nom.sum().astype(float) - if is_file_path(network_path): + if pathlib.Path(network_path).is_file(): n = pypsa.Network(network_path) lines_length = float((n.lines.length * n.lines.num_parallel).sum()) @@ -348,7 +349,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"): """ snakemake = _mock_snakemake(rulename) - if not is_file_path(snakemake.output.africa_shape): + if not pathlib.Path(snakemake.output.africa_shape).is_file(): return pd.DataFrame() df_continent = gpd.read_file(snakemake.output.africa_shape) @@ -359,7 +360,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"): .geometry.area.iloc[0] ) - if not is_file_path(snakemake.output.gadm_shapes): + if not pathlib.Path(snakemake.output.gadm_shapes).is_file(): return pd.DataFrame() df_gadm = gpd.read_file(snakemake.output.gadm_shapes) @@ -473,7 +474,7 @@ def collect_renewable_stats(rulename, technology): """ snakemake = _mock_snakemake(rulename, technology=technology) - if is_file_path(snakemake.output.profile): + if pathlib.Path(snakemake.output.profile).is_file(): res = xr.open_dataset(snakemake.output.profile) if technology == "hydro": @@ -506,7 +507,7 @@ def add_computational_stats(df, snakemake, column_name=None): comp_data = [np.nan] * 3 # total_time, mean_load and max_memory if snakemake.benchmark: - if not is_file_path(snakemake.benchmark): + if not pathlib.Path(snakemake.benchmark).is_file(): return df bench_data = pd.read_csv(snakemake.benchmark, delimiter="\t") diff --git a/scripts/make_summary.py b/scripts/make_summary.py index 390bcd66f..5ad6c5bfc 100644 --- a/scripts/make_summary.py +++ b/scripts/make_summary.py @@ -52,6 +52,8 @@ Replacing *summaries* with *plots* creates nice colored maps of the results. """ +import pathlib + import pandas as pd import pypsa from _helpers import ( @@ -61,7 +63,6 @@ create_logger, get_path, mock_snakemake, - path_exists, ) from add_electricity import load_costs, update_transmission_costs @@ -503,7 +504,7 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all for label, filename in networks_dict.items(): print(label, filename) - if not path_exists(filename): + if not pathlib.Path(filename).exists(): print("does not exist!!") continue diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py index 0f369ed27..d425a781d 100644 --- a/scripts/retrieve_databundle_light.py +++ b/scripts/retrieve_databundle_light.py @@ -93,7 +93,6 @@ configure_logging, create_country_list, create_logger, - get_basename_path, get_current_directory_path, get_path, get_relative_path, @@ -445,7 +444,7 @@ def download_and_unzip_direct(config, root_path, hot_run=True, disable_progress= destination = get_relative_path(config["destination"]) url = config["urls"]["direct"] - file_path = get_path(destination, get_basename_path(url)) + file_path = get_path(destination, pathlib.Path(url).name) unzip = config.get("unzip", False) @@ -507,7 +506,7 @@ def download_and_unzip_hydrobasins( for rg in suffix_list: url = url_templ + "hybas_" + rg + "_lev" + level_code + "_v1c.zip" - file_path = get_path(destination, get_basename_path(url)) + file_path = get_path(destination, pathlib.Path(url).name) all_downloaded &= download_and_unpack( url=url, @@ -554,7 +553,7 @@ def download_and_unzip_post(config, root_path, hot_run=True, disable_progress=Fa # remove url feature url = postdata.pop("url") - file_path = get_path(destination, get_basename_path(url)) + file_path = get_path(destination, pathlib.Path(url).name) if hot_run: pathlib.Path(file_path).unlink(missing_ok=True) diff --git a/test/test_helpers.py b/test/test_helpers.py index 4f41c5740..4d65adea2 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -27,18 +27,12 @@ build_directory, change_to_script_dir, country_name_2_two_digits, - get_abs_path, - get_basename_abs_path, get_conv_factors, get_current_directory_path, - get_dirname_path, get_path, get_path_size, get_relative_path, - is_directory_path, - is_file_path, modify_commodity, - path_exists, safe_divide, three_2_two_digits_country, two_2_three_digits_country, @@ -257,15 +251,6 @@ def test_build_directory(get_temp_folder, tmpdir): assert full_tree_list_os == full_tree_list_pathlib -def test_get_abs_path(): - """ - Verify the path returned by get_abs_path() - """ - abs_file = get_abs_path(__file__) - assert str(abs_file) == os.path.abspath(__file__) - assert str(abs_file) == __file__ - - def test_change_to_script_dir(): """ Verify the path returned by change_to_script_dir() @@ -276,26 +261,6 @@ def test_change_to_script_dir(): assert str(pathlib.Path.cwd()) == path_cwd -def test_get_dirname_path(): - """ - Verify the path returned by get_dirname_path() - """ - dir_name_file = get_dirname_path(__file__) - dir_name_cwd = get_dirname_path(".") - assert str(dir_name_file) == os.path.dirname(__file__) - assert str(dir_name_file) == path_cwd + os.sep + "test" - assert str(dir_name_cwd) == "." - - -def test_get_basename_abs_path(): - """ - Verify the path returned by get_basename_abs_path() - """ - base_name_file = get_basename_abs_path(__file__) - assert str(base_name_file) == os.path.basename(os.path.abspath(__file__)) - assert str(base_name_file) == "test_helpers.py" - - def test_get_path(): """ Verify the path returned by get_path() @@ -344,25 +309,6 @@ def test_get_current_directory_path(): assert str(path) == os.getcwd() -def test_is_directory_path(tmpdir): - """ - Verify if is_directory_path() returns True when path points to directory. - """ - assert is_directory_path(tmpdir) - assert is_directory_path(tmpdir) == os.path.isdir(tmpdir) - assert not is_directory_path(__file__) - - -def test_is_file_path(get_temp_file, tmpdir): - """ - Verify if is_file_path() returns True when path points to file. - """ - path = get_temp_file - assert is_file_path(path) - assert is_file_path(path) == os.path.isfile(path) - assert not is_file_path(tmpdir) - - def test_get_relative_path(get_temp_file): """ Verify the relative path returned by get_relative_path() @@ -374,17 +320,6 @@ def test_get_relative_path(get_temp_file): assert str(relative_path) == os.path.relpath(path, start=get_path(path).parent) -def test_path_exists(get_temp_file): - """ - Verify if path_exists() returns True when path exists. - """ - path = get_temp_file - pathlib_path = get_path(path) - assert path_exists(path) - assert path_exists(pathlib_path) - assert path_exists(path) == os.path.exists(path) - - def test_two_2_three_digits_country(): """ Verify the conversion from two-digit to three-digit country code.