diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml index 2f09e347b..a0e0299c4 100644 --- a/.github/workflows/ci-linux.yaml +++ b/.github/workflows/ci-linux.yaml @@ -3,10 +3,10 @@ name: CI-linux on: push: branches: - - main + - oet_main pull_request: branches: - - main + - oet_main schedule: - cron: "0 5 * * TUE" @@ -82,6 +82,11 @@ jobs: cp test/tmp/config.landlock_tmp.yaml config.yaml snakemake --cores all solve_all_networks --forceall + - name: Unit tests + run: | + python -m pip install pytest + pytest test/ + # - name: Test plotting and summaries # run: | # snakemake --cores all plot_all_p_nom diff --git a/.github/workflows/ci-mac.yaml b/.github/workflows/ci-mac.yaml index e766539f7..4cb98fa97 100644 --- a/.github/workflows/ci-mac.yaml +++ b/.github/workflows/ci-mac.yaml @@ -3,10 +3,10 @@ name: CI-mac on: push: branches: - - main + - oet_main pull_request: branches: - - main + - oet_main schedule: - cron: "0 5 * * TUE" @@ -68,6 +68,11 @@ jobs: cp test/tmp/config.tutorial_noprogress_tmp.yaml config.yaml snakemake --cores all solve_all_networks + - name: Unit tests + run: | + python -m pip install pytest + pytest test/ + # - name: Test plotting and summaries # run: | # snakemake --cores all plot_all_p_nom diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml index 5943cb9cb..14d56e903 100644 --- a/.github/workflows/ci-windows.yaml +++ b/.github/workflows/ci-windows.yaml @@ -3,10 +3,10 @@ name: CI-windows on: push: branches: - - main + - oet_main pull_request: branches: - - main + - oet_main schedule: - cron: "0 5 * * TUE" @@ -68,6 +68,11 @@ jobs: cp test/tmp/config.tutorial_noprogress_tmp.yaml config.yaml snakemake --cores all solve_all_networks + - name: Unit tests + run: | + python -m pip install pytest + pytest test/ + # - name: Test plotting and summaries # run: | # snakemake --cores all plot_all_p_nom diff --git a/Snakefile b/Snakefile index ac8724ef3..32c92ba5b 100644 --- a/Snakefile +++ b/Snakefile @@ -6,7 +6,7 @@ import sys sys.path.append("./scripts") -from os.path import normpath, exists, isdir +from os.path import normpath, exists from shutil import copyfile, move from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider diff --git a/config.default.yaml b/config.default.yaml index 2a09a18eb..91ae2c53f 100644 --- a/config.default.yaml +++ b/config.default.yaml @@ -481,13 +481,20 @@ plotting: "electricity": "#f9d002" "lines": "#70af1d" "transmission lines": "#70af1d" + "AC": "#70af1d" "AC-AC": "#70af1d" "AC line": "#70af1d" "links": "#8a1caf" "HVDC links": "#8a1caf" + "DC": "#8a1caf" "DC-DC": "#8a1caf" "DC link": "#8a1caf" "load": "#ff0000" + "load shedding": "#ff0000" + "battery discharger": slategray + "battery charger": slategray + "h2 fuel cell": '#c251ae' + "h2 electrolysis": '#ff29d9' "csp": "#fdd404" nice_names: OCGT: "Open-Cycle Gas" diff --git a/config.tutorial.yaml b/config.tutorial.yaml index 7919ed175..e6be7cf5b 100644 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -467,13 +467,20 @@ plotting: "electricity": "#f9d002" "lines": "#70af1d" "transmission lines": "#70af1d" + "AC": "#70af1d" "AC-AC": "#70af1d" "AC line": "#70af1d" "links": "#8a1caf" "HVDC links": "#8a1caf" + "DC": "#8a1caf" "DC-DC": "#8a1caf" "DC link": "#8a1caf" "load": "#ff0000" + "load shedding": "#ff0000" + "battery discharger": slategray + "battery charger": slategray + "h2 fuel cell": '#c251ae' + "h2 electrolysis": '#ff29d9' "csp": "#fdd404" nice_names: OCGT: "Open-Cycle Gas" diff --git a/doc/conf.py b/doc/conf.py index 8bd5c798a..fdf42ba93 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -12,16 +12,16 @@ # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. +# documentation root, use pathlib.Path.absolute to make it absolute, like shown here. # import datetime -import os +import pathlib import shutil import sys from git import Repo -sys.path.insert(0, os.path.abspath("../scripts")) +sys.path.insert(0, str(pathlib.Path("../scripts").absolute())) for p in sys.path: print(p) diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 4fdf000b9..d07951f13 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -7,14 +7,28 @@ import logging import os +import pathlib +import shutil import subprocess import sys -from pathlib import Path +import urllib +import zipfile import country_converter as coco +import fiona import geopandas as gpd +import numpy as np import pandas as pd +import pypsa +import requests +import snakemake as sm import yaml +from pypsa.clustering.spatial import _make_consense +from pypsa.components import component_attrs, components +from pypsa.descriptors import Dict +from shapely.geometry import Point +from snakemake.script import Snakemake +from tqdm import tqdm logger = logging.getLogger(__name__) @@ -34,21 +48,21 @@ def handle_exception(exc_type, exc_value, exc_traceback): tb = exc_traceback while tb.tb_next: tb = tb.tb_next - flname = tb.tb_frame.f_globals.get("__file__") - funcname = tb.tb_frame.f_code.co_name + fl_name = tb.tb_frame.f_globals.get("__file__") + func_name = tb.tb_frame.f_code.co_name if issubclass(exc_type, KeyboardInterrupt): logger.error( "Manual interruption %r, function %r: %s", - flname, - funcname, + fl_name, + func_name, exc_value, ) else: logger.error( "An error happened in module %r, function %r: %s", - flname, - funcname, + fl_name, + func_name, exc_value, exc_info=(exc_type, exc_value, exc_traceback), ) @@ -59,12 +73,12 @@ def create_logger(logger_name, level=logging.INFO): Create a logger for a module and adds a handler needed to capture in logs traceback from exceptions emerging during the workflow. """ - logger = logging.getLogger(logger_name) - logger.setLevel(level) + logger_instance = logging.getLogger(logger_name) + logger_instance.setLevel(level) handler = logging.StreamHandler(stream=sys.stdout) - logger.addHandler(handler) + logger_instance.addHandler(handler) sys.excepthook = handle_exception - return logger + return logger_instance def read_osm_config(*args): @@ -96,12 +110,12 @@ def read_osm_config(*args): {"Africa": {"DZ": "algeria", ...}, ...} """ if "__file__" in globals(): - base_folder = os.path.dirname(__file__) - if not os.path.exists(os.path.join(base_folder, "configs")): - base_folder = os.path.dirname(base_folder) + base_folder = pathlib.Path(__file__).parent + if not pathlib.Path(get_path(base_folder, "configs")).exists(): + base_folder = pathlib.Path(base_folder).parent else: - base_folder = os.getcwd() - osm_config_path = os.path.join(base_folder, "configs", REGIONS_CONFIG) + base_folder = get_current_directory_path() + osm_config_path = get_path(base_folder, "configs", REGIONS_CONFIG) with open(osm_config_path, "r") as f: osm_config = yaml.safe_load(f) if len(args) == 0: @@ -112,7 +126,7 @@ def read_osm_config(*args): return tuple([osm_config[a] for a in args]) -def sets_path_to_root(root_directory_name): +def sets_path_to_root(root_directory_name, n=8): """ Search and sets path to the given root directory (root/path/file). @@ -123,17 +137,15 @@ def sets_path_to_root(root_directory_name): n : int Number of folders the function will check upwards/root directed. """ - import os repo_name = root_directory_name - n = 8 # check max 8 levels above. Random default. n0 = n while n >= 0: n -= 1 # if repo_name is current folder name, stop and set path - if repo_name == os.path.basename(os.path.abspath(".")): - repo_path = os.getcwd() # os.getcwd() = current_path + if repo_name == pathlib.Path(".").absolute().name: + repo_path = get_current_directory_path() # current_path os.chdir(repo_path) # change dir_path to repo_path print("This is the repository path: ", repo_path) print("Had to go %d folder(s) up." % (n0 - 1 - n)) @@ -143,8 +155,7 @@ def sets_path_to_root(root_directory_name): print("Can't find the repo path.") # if repo_name NOT current folder name, go one directory higher else: - upper_path = os.path.dirname(os.path.abspath(".")) # name of upper folder - os.chdir(upper_path) + change_to_script_dir(".") # change to the upper folder def configure_logging(snakemake, skip_handlers=False): @@ -165,14 +176,13 @@ def configure_logging(snakemake, skip_handlers=False): skip_handlers : True | False (default) Do (not) skip the default handlers created for redirecting output to STDERR and file. """ - import logging kwargs = snakemake.config.get("logging", dict()).copy() kwargs.setdefault("level", "INFO") if skip_handlers is False: - fallback_path = Path(__file__).parent.joinpath( - "..", "logs", f"{snakemake.rule}.log" + fallback_path = get_path( + pathlib.Path(__file__).parent, "..", "logs", f"{snakemake.rule}.log" ) logfile = snakemake.log.get( "python", snakemake.log[0] if snakemake.log else fallback_path @@ -216,80 +226,38 @@ def load_network(import_name=None, custom_components=None): ------- pypsa.Network """ - import pypsa - from pypsa.descriptors import Dict override_components = None - override_component_attrs = None + override_component_attrs_dict = None if custom_components is not None: override_components = pypsa.components.components.copy() - override_component_attrs = Dict( + override_component_attrs_dict = Dict( {k: v.copy() for k, v in pypsa.components.component_attrs.items()} ) for k, v in custom_components.items(): override_components.loc[k] = v["component"] - override_component_attrs[k] = pd.DataFrame( + override_component_attrs_dict[k] = pd.DataFrame( columns=["type", "unit", "default", "description", "status"] ) for attr, val in v["attributes"].items(): - override_component_attrs[k].loc[attr] = val + override_component_attrs_dict[k].loc[attr] = val return pypsa.Network( import_name=import_name, override_components=override_components, - override_component_attrs=override_component_attrs, + override_component_attrs=override_component_attrs_dict, ) -def pdbcast(v, h): - return pd.DataFrame( - v.values.reshape((-1, 1)) * h.values, index=v.index, columns=h.index - ) - - -def load_network_for_plots( - fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True -): - import pypsa - from add_electricity import load_costs, update_transmission_costs - - n = pypsa.Network(fn) - - n.loads["carrier"] = n.loads.bus.map(n.buses.carrier) + " load" - n.stores["carrier"] = n.stores.bus.map(n.buses.carrier) - - n.links["carrier"] = ( - n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier) - ) - n.lines["carrier"] = "AC line" - n.transformers["carrier"] = "AC transformer" - - n.lines["s_nom"] = n.lines["s_nom_min"] - n.links["p_nom"] = n.links["p_nom_min"] - - if combine_hydro_ps: - n.storage_units.loc[ - n.storage_units.carrier.isin({"PHS", "hydro"}), "carrier" - ] = "hydro+PHS" - - # if the carrier was not set on the heat storage units - # bus_carrier = n.storage_units.bus.map(n.buses.carrier) - # n.storage_units.loc[bus_carrier == "heat","carrier"] = "water tanks" - - Nyears = n.snapshot_weightings.objective.sum() / 8760.0 - costs = load_costs(tech_costs, cost_config, elec_config, Nyears) - update_transmission_costs(n, costs) - - return n - - def update_p_nom_max(n): - # if extendable carriers (solar/onwind/...) have capacity >= 0, - # e.g. existing assets from the OPSD project are included to the network, - # the installed capacity might exceed the expansion limit. - # Hence, we update the assumptions. + """ + If extendable carriers (solar/onwind/...) have capacity >= 0, e.g. existing + assets from the OPSD project are included to the network, the installed + capacity might exceed the expansion limit. + Hence, we update the assumptions. + """ n.generators.p_nom_max = n.generators[["p_nom_min", "p_nom_max"]].max(1) @@ -347,7 +315,7 @@ def aggregate_p_curtailed(n): def aggregate_costs(n, flatten=False, opts=None, existing_only=False): - components = dict( + components_dict = dict( Link=("p_nom", "p0"), Generator=("p_nom", "p"), StorageUnit=("p_nom", "p"), @@ -358,7 +326,8 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False): costs = {} for c, (p_nom, p_attr) in zip( - n.iterate_components(components.keys(), skip_empty=False), components.values() + n.iterate_components(components_dict.keys(), skip_empty=False), + components_dict.values(), ): if c.df.empty: continue @@ -390,10 +359,10 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False): def progress_retrieve( - url, file, data=None, headers=None, disable_progress=False, roundto=1.0 + url, file, data=None, headers=None, disable_progress=False, round_to_value=1.0 ): """ - Function to download data from a url with a progress bar progress in + Function to download data from an url with a progress bar progress in retrieving data. Parameters @@ -406,18 +375,18 @@ def progress_retrieve( Data for the request (default None), when not none Post method is used disable_progress : bool When true, no progress bar is shown - roundto : float + round_to_value : float (default 0) Precision used to report the progress e.g. 0.1 stands for 88.1, 10 stands for 90, 80 """ - import urllib - - from tqdm import tqdm pbar = tqdm(total=100, disable=disable_progress) - def dlProgress(count, blockSize, totalSize, roundto=roundto): - pbar.n = round(count * blockSize * 100 / totalSize / roundto) * roundto + def dl_progress(count, block_size, total_size): + pbar.n = ( + round(count * block_size * 100 / total_size / round_to_value) + * round_to_value + ) pbar.refresh() if data is not None: @@ -428,7 +397,7 @@ def dlProgress(count, blockSize, totalSize, roundto=roundto): opener.addheaders = headers urllib.request.install_opener(opener) - urllib.request.urlretrieve(url, file, reporthook=dlProgress, data=data) + urllib.request.urlretrieve(url, file, reporthook=dl_progress, data=data) def get_aggregation_strategies(aggregation_strategies): @@ -438,14 +407,6 @@ def get_aggregation_strategies(aggregation_strategies): the function's definition) they get lost when custom values are specified in the config. """ - import numpy as np - - # to handle the new version of PyPSA. - try: - from pypsa.clustering.spatial import _make_consense - except Exception: - # TODO: remove after new release and update minimum pypsa version - from pypsa.clustering.spatial import _make_consense bus_strategies = dict(country=_make_consense("Bus", "country")) bus_strategies.update(aggregation_strategies.get("buses", {})) @@ -456,7 +417,7 @@ def get_aggregation_strategies(aggregation_strategies): return bus_strategies, generator_strategies -def mock_snakemake(rulename, **wildcards): +def mock_snakemake(rule_name, **wildcards): """ This function is expected to be executed from the "scripts"-directory of " the snakemake project. It returns a snakemake.script.Snakemake object, @@ -466,25 +427,20 @@ def mock_snakemake(rulename, **wildcards): Parameters ---------- - rulename: str + rule_name: str name of the rule for which the snakemake object should be generated wildcards: keyword arguments fixing the wildcards. Only necessary if wildcards are needed. """ - import os - - import snakemake as sm - from pypsa.descriptors import Dict - from snakemake.script import Snakemake - script_dir = Path(__file__).parent.resolve() + script_dir = pathlib.Path(__file__).parent.resolve() assert ( - Path.cwd().resolve() == script_dir + pathlib.Path.cwd().resolve() == script_dir ), f"mock_snakemake has to be run from the repository scripts directory {script_dir}" os.chdir(script_dir.parent) for p in sm.SNAKEFILE_CHOICES: - if os.path.exists(p): + if pathlib.Path(p).exists(): snakefile = p break workflow = sm.Workflow( @@ -493,12 +449,12 @@ def mock_snakemake(rulename, **wildcards): workflow.include(snakefile) workflow.global_resources = {} try: - rule = workflow.get_rule(rulename) + rule = workflow.get_rule(rule_name) except Exception as exception: print( exception, - f"The {rulename} might be a conditional rule in the Snakefile.\n" - f"Did you enable {rulename} in the config?", + f"The {rule_name} might be a conditional rule in the Snakefile.\n" + f"Did you enable {rule_name} in the config?", ) raise dag = sm.dag.DAG(workflow, rules=[rule]) @@ -508,7 +464,7 @@ def mock_snakemake(rulename, **wildcards): def make_accessable(*ios): for io in ios: for i in range(len(io)): - io[i] = os.path.abspath(io[i]) + io[i] = pathlib.Path(io[i]).absolute() make_accessable(job.input, job.output, job.log) snakemake = Snakemake( @@ -527,7 +483,7 @@ def make_accessable(*ios): # create log and output dir if not existent for path in list(snakemake.log) + list(snakemake.output): - Path(path).parent.mkdir(parents=True, exist_ok=True) + build_directory(path) os.chdir(script_dir) return snakemake @@ -575,7 +531,9 @@ def three_2_two_digits_country(three_code_country): return two_code_country -def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_words=[]): +def two_digits_2_name_country( + two_code_country, name_string="name_short", no_comma=False, remove_start_words=[] +): """ Convert 2-digit country code to full name country: @@ -583,7 +541,10 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word ---------- two_code_country: str 2-digit country name - nocomma: bool (optional, default False) + name_string: str (optional, default name_short) + When name_short CD -> DR Congo + When name_official CD -> Democratic Republic of the Congo + no_comma: bool (optional, default False) When true, country names with comma are extended to remove the comma. Example CD -> Congo, The Democratic Republic of -> The Democratic Republic of Congo remove_start_words: list (optional, default empty) @@ -595,13 +556,15 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word full_name: str full country name """ + if remove_start_words is None: + remove_start_words = list() if two_code_country == "SN-GM": return f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}" - full_name = coco.convert(two_code_country, to="name_short") + full_name = coco.convert(two_code_country, to=name_string) - if nocomma: - # separate list by delim + if no_comma: + # separate list by delimiter splits = full_name.split(", ") # reverse the order @@ -610,7 +573,7 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word # return the merged string full_name = " ".join(splits) - # when list is non empty + # when list is non-empty if remove_start_words: # loop over every provided word for word in remove_start_words: @@ -652,7 +615,7 @@ def read_csv_nafix(file, **kwargs): if "na_values" not in kwargs: kwargs["na_values"] = NA_VALUES - if os.stat(file).st_size > 0: + if get_path_size(file) > 0: return pd.read_csv(file, **kwargs) else: return pd.DataFrame() @@ -670,8 +633,7 @@ def to_csv_nafix(df, path, **kwargs): def save_to_geojson(df, fn): - if os.path.exists(fn): - os.unlink(fn) # remove file if it exists + pathlib.Path(fn).unlink(missing_ok=True) # remove file if it exists # save file if the (Geo)DataFrame is non-empty if df.empty: @@ -701,7 +663,7 @@ def read_geojson(fn, cols=[], dtype=None, crs="EPSG:4326"): CRS of the GeoDataFrame """ # if the file is non-zero, read the geodataframe and return it - if os.path.getsize(fn) > 0: + if get_path_size(fn) > 0: return gpd.read_file(fn) else: # else return an empty GeoDataFrame @@ -733,7 +695,6 @@ def create_country_list(input, iso_coding=True): full_codes_list : list Example ["NG","ZA"] """ - import logging _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) @@ -805,7 +766,7 @@ def get_last_commit_message(path): """ _logger = logging.getLogger(__name__) last_commit_message = None - backup_cwd = os.getcwd() + backup_cwd = get_current_directory_path() try: os.chdir(path) last_commit_message = ( @@ -821,3 +782,478 @@ def get_last_commit_message(path): os.chdir(backup_cwd) return last_commit_message + + +def get_path(*args): + """ + It returns a new path string. + """ + return pathlib.Path(*args) + + +def get_path_size(path): + """ + It returns the size of a path (in bytes) + """ + return pathlib.Path(path).stat().st_size + + +def build_directory(path, just_parent_directory=True): + """ + It creates recursively the directory and its leaf directories. + + Parameters: + path (str): The path to the file + just_parent_directory (Boolean): given a path dir/subdir + True: it creates just the parent directory dir + False: it creates the full directory tree dir/subdir + """ + + # Check if the provided path points to a directory + if just_parent_directory: + pathlib.Path(path).parent.mkdir(parents=True, exist_ok=True) + else: + pathlib.Path(path).mkdir(parents=True, exist_ok=True) + + +def change_to_script_dir(path): + """ + Change the current working directory to the directory containing the given + script. + + Parameters: + path (str): The path to the file. + """ + + # Get the absolutized and normalized path of directory containing the file + directory_path = pathlib.Path(path).absolute().parent + + # Change the current working directory to the script directory + os.chdir(directory_path) + + +def get_current_directory_path(): + """ + It returns the current directory path. + """ + return pathlib.Path.cwd() + + +def get_relative_path(path, start_path="."): + """ + It returns a relative path to path from start_path. + + Default for start_path is the current directory + """ + return pathlib.Path(path).relative_to(start_path) + + +def create_network_topology(n, prefix, connector=" <-> ", bidirectional=True): + """ + Create a network topology like the power transmission network. + + Parameters + ---------- + n : pypsa.Network + prefix : str + connector : str + bidirectional : bool, default True + True: one link for each connection + False: one link for each connection and direction (back and forth) + + Returns + ------- + pd.DataFrame with columns bus0, bus1 and length + """ + + ln_attrs = ["bus0", "bus1", "length"] + lk_attrs = ["bus0", "bus1", "length", "underwater_fraction"] + + # TODO: temporary fix for when underwater_fraction is not found + if "underwater_fraction" not in n.links.columns: + if n.links.empty: + n.links["underwater_fraction"] = None + else: + n.links["underwater_fraction"] = 0.0 + + candidates = pd.concat( + [n.lines[ln_attrs], n.links.loc[n.links.carrier == "DC", lk_attrs]] + ).fillna(0) + + positive_order = candidates.bus0 < candidates.bus1 + candidates_p = candidates[positive_order] + swap_buses = {"bus0": "bus1", "bus1": "bus0"} + candidates_n = candidates[~positive_order].rename(columns=swap_buses) + candidates = pd.concat([candidates_p, candidates_n]) + + def make_index(c): + return prefix + c.bus0 + connector + c.bus1 + + topo = candidates.groupby(["bus0", "bus1"], as_index=False).mean() + topo.index = topo.apply(make_index, axis=1) + + if not bidirectional: + topo_reverse = topo.copy() + topo_reverse.rename(columns=swap_buses, inplace=True) + topo_reverse.index = topo_reverse.apply(make_index, axis=1) + topo = pd.concat([topo, topo_reverse]) + + return topo + + +def cycling_shift(df, steps=1): + """ + Cyclic shift on index of pd.Series|pd.DataFrame by number of steps. + """ + df = df.copy() + new_index = np.roll(df.index, steps) + df.values[:] = df.reindex(index=new_index).values + return df + + +def download_gadm(country_code, update=False, out_logging=False): + """ + Download gpkg file from GADM for a given country code. + + Parameters + ---------- + country_code : str + Two letter country codes of the downloaded files + update : bool + Update = true, forces re-download of files + + Returns + ------- + gpkg file per country + """ + + gadm_filename = f"gadm36_{two_2_three_digits_country(country_code)}" + gadm_url = f"https://biogeo.ucdavis.edu/data/gadm3.6/gpkg/{gadm_filename}_gpkg.zip" + _logger = logging.getLogger(__name__) + gadm_input_file_zip = get_path( + get_current_directory_path(), + "data", + "raw", + "gadm", + gadm_filename, + gadm_filename + ".zip", + ) # Input filepath zip + + gadm_input_file_gpkg = get_path( + get_current_directory_path(), + "data", + "raw", + "gadm", + gadm_filename, + gadm_filename + ".gpkg", + ) # Input filepath gpkg + + if not pathlib.Path(gadm_input_file_gpkg).exists() or update is True: + if out_logging: + _logger.warning( + f"Stage 4/4: {gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_zip}" + ) + # create data/osm directory + build_directory(gadm_input_file_zip) + + with requests.get(gadm_url, stream=True) as r: + with open(gadm_input_file_zip, "wb") as f: + shutil.copyfileobj(r.raw, f) + + with zipfile.ZipFile(gadm_input_file_zip, "r") as zip_ref: + zip_ref.extractall(pathlib.Path(gadm_input_file_zip).parent) + + return gadm_input_file_gpkg, gadm_filename + + +def get_gadm_layer(country_list, layer_id, update=False, outlogging=False): + """ + Function to retrieve a specific layer id of a geopackage for a selection of + countries. + + Parameters + ---------- + country_list : str + List of the countries + layer_id : int + Layer to consider in the format GID_{layer_id}. + When the requested layer_id is greater than the last available layer, then the last layer is selected. + When a negative value is requested, then, the last layer is requested + """ + # initialization of the list of geodataframes + geodf_list = [] + + for country_code in country_list: + # download file gpkg + file_gpkg, name_file = download_gadm(country_code, update, outlogging) + + # get layers of a geopackage + list_layers = fiona.listlayers(file_gpkg) + + # get layer name + if layer_id < 0 | layer_id >= len(list_layers): + # when layer id is negative or larger than the number of layers, select the last layer + layer_id = len(list_layers) - 1 + code_layer = np.mod(layer_id, len(list_layers)) + layer_name = ( + f"gadm36_{two_2_three_digits_country(country_code).upper()}_{code_layer}" + ) + + # read gpkg file + geodf_temp = gpd.read_file(file_gpkg, layer=layer_name) + + # convert country name representation of the main country (GID_0 column) + geodf_temp["GID_0"] = [ + three_2_two_digits_country(twoD_c) for twoD_c in geodf_temp["GID_0"] + ] + + # create a subindex column that is useful + # in the GADM processing of sub-national zones + geodf_temp["GADM_ID"] = geodf_temp[f"GID_{code_layer}"] + + # concatenate geodataframes + geodf_list = pd.concat([geodf_list, geodf_temp]) + + geodf_gadm = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True)) + geodf_gadm.set_crs(geodf_list[0].crs, inplace=True) + + return geodf_gadm + + +def locate_bus( + coords, + co, + gadm_level, + path_to_gadm=None, + gadm_clustering=False, +): + """ + Function to locate the right node for a coordinate set input coords of + point. + + Parameters + ---------- + coords: pandas dataseries + dataseries with 2 rows x & y representing the longitude and latitude + co: string (code for country where coords are MA Morocco) + code of the countries where the coordinates are + """ + col = "name" + if not gadm_clustering: + gdf = gpd.read_file(path_to_gadm) + else: + if path_to_gadm: + gdf = gpd.read_file(path_to_gadm) + if "GADM_ID" in gdf.columns: + col = "GADM_ID" + + if gdf[col][0][ + :3 + ].isalpha(): # TODO clean later by changing all codes to 2 letters + gdf[col] = gdf[col].apply( + lambda name: three_2_two_digits_country(name[:3]) + name[3:] + ) + else: + gdf = get_gadm_layer(co, gadm_level) + col = "GID_{}".format(gadm_level) + + # gdf.set_index("GADM_ID", inplace=True) + gdf_co = gdf[ + gdf[col].str.contains(co) + ] # geodataframe of entire continent - output of prev function {} are placeholders + # in strings - conditional formatting + # insert any variable into that place using .format - extract string and filter for those containing co (MA) + point = Point(coords["x"], coords["y"]) # point object + + try: + return gdf_co[gdf_co.contains(point)][ + col + ].item() # filter gdf_co which contains point and returns the bus + + except ValueError: + return gdf_co[gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))][ + col + ].item() # looks for closest one shape=node + + +def override_component_attrs(directory): + """Tell PyPSA that links can have multiple outputs by + overriding the component_attrs. This can be done for + as many buses as you need with format busi for i = 2,3,4,5,.... + See https://pypsa.org/doc/components.html#link-with-multiple-outputs-or-inputs + + Parameters + ---------- + directory : string + Folder where component attributes to override are stored + analogous to ``pypsa/component_attrs``, e.g. `links.csv`. + + Returns + ------- + Dictionary of overridden component attributes. + """ + + attrs = Dict({k: v.copy() for k, v in component_attrs.items()}) + + for component, list_name in components.list_name.items(): + fn = f"{directory}/{list_name}.csv" + if pathlib.Path(fn).is_file(): + overrides = pd.read_csv(fn, index_col=0, na_values="n/a") + attrs[component] = overrides.combine_first(attrs[component]) + + return attrs + + +def get_conv_factors(sector): + """ + Create a dictionary with all the conversion factors for the standard net calorific value + from Tera Joule per Kilo Metric-ton to Tera Watt-hour based on + https://unstats.un.org/unsd/energy/balance/2014/05.pdf. + + Considering that 1 Watt-hour = 3600 Joule, one obtains the values below dividing + the standard net calorific values from the pdf by 3600. + + For example, the value "hard coal": 0.007167 is given by 25.8 / 3600, where 25.8 is the standard + net calorific value. + """ + + conversion_factors_dict = { + "additives and oxygenates": 0.008333, + "anthracite": 0.005, + "aviation gasoline": 0.01230, + "bagasse": 0.002144, + "biodiesel": 0.01022, + "biogasoline": 0.007444, + "bio jet kerosene": 0.011111, + "bitumen": 0.01117, + "brown coal": 0.003889, + "brown coal briquettes": 0.00575, + "charcoal": 0.00819, + "coal tar": 0.007778, + "coke-oven coke": 0.0078334, + "coke-oven gas": 0.000277, + "coking coal": 0.007833, + "conventional crude oil": 0.01175, + "crude petroleum": 0.011750, + "ethane": 0.01289, + "fuel oil": 0.01122, + "fuelwood": 0.00254, + "gas coke": 0.007326, + "gas oil/ diesel oil": 0.01194, + "gasoline-type jet fuel": 0.01230, + "hard coal": 0.007167, + "kerosene-type jet fuel": 0.01225, + "lignite": 0.003889, + "liquefied petroleum gas (lpg)": 0.01313, + "lubricants": 0.011166, + "motor gasoline": 0.01230, + "naphtha": 0.01236, + "natural gas": 0.00025, + "natural gas liquids": 0.01228, + "oil shale": 0.00247, + "other bituminous coal": 0.005556, + "paraffin waxes": 0.01117, + "patent fuel": 0.00575, + "peat": 0.00271, + "peat products": 0.00271, + "petroleum coke": 0.009028, + "refinery gas": 0.01375, + "sub-bituminous coal": 0.005555, + } + + if sector == "industry": + return conversion_factors_dict + else: + logger.info(f"No conversion factors available for sector {sector}") + return np.nan + + +def aggregate_fuels(sector): + gas_fuels = [ + "blast furnace gas", + "natural gas (including lng)", + "natural gas liquids", + ] + + oil_fuels = [ + "bitumen", + "conventional crude oil", + "crude petroleum", + "ethane", + "fuel oil", + "gas oil/ diesel oil", + "kerosene-type jet fuel", + "liquefied petroleum gas (lpg)", + "lubricants", + "motor gasoline", + "naphtha", + "patent fuel", + "petroleum coke", + "refinery gas", + ] + + coal_fuels = [ + "anthracite", + "brown coal", + "brown coal briquettes", + "coke-oven coke", + "coke-oven gas", + "coking coal", + "gas coke", + "gasworks gas", + "hard coal", + "lignite", + "other bituminous coal", + "peat", + "peat products", + "sub-bituminous coal", + ] + + biomass_fuels = [ + "bagasse", + "fuelwood", + "biogases", + "biogasoline", + "biodiesel", + "charcoal", + "black liquor", + ] + + electricity = ["electricity"] + + heat = ["heat", "direct use of geothermal heat", "direct use of solar thermal heat"] + + if sector == "industry": + return gas_fuels, oil_fuels, biomass_fuels, coal_fuels, heat, electricity + else: + logger.info(f"No fuels available for sector {sector}") + return np.nan + + +def modify_commodity(commodity): + if commodity.strip() == "Hrad coal": + commodity = "Hard coal" + elif commodity.strip().casefold() == "coke oven gas": + commodity = "Coke-oven gas" + elif commodity.strip().casefold() == "coke oven coke": + commodity = "Coke-oven coke" + elif commodity.strip() == "Liquified Petroleum Gas (LPG)": + commodity = "Liquefied Petroleum Gas (LPG)" + elif commodity.strip() == "Gas Oil/Diesel Oil": + commodity = "Gas Oil/ Diesel Oil" + elif commodity.strip() == "Lignite brown coal- recoverable resources": + commodity = "Lignite brown coal - recoverable resources" + return commodity.strip().casefold() + + +def safe_divide(numerator, denominator): + """ + Safe division function that returns NaN when the denominator is zero. + """ + if denominator != 0.0: + return numerator / denominator + else: + logging.warning( + f"Division by zero: {numerator} / {denominator}, returning NaN." + ) + return np.nan diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index 0911588f7..f953d853b 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -84,15 +84,20 @@ - additional open- and combined-cycle gas turbines (if ``OCGT`` and/or ``CCGT`` is listed in the config setting ``electricity: extendable_carriers``) """ -import os - import numpy as np import pandas as pd import powerplantmatching as pm import pypsa import xarray as xr -from _helpers import configure_logging, create_logger, read_csv_nafix, update_p_nom_max -from powerplantmatching.export import map_country_bus +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, + read_csv_nafix, + sets_path_to_root, + update_p_nom_max, +) idx = pd.IndexSlice @@ -365,7 +370,7 @@ def attach_wind_and_solar( if not df.query("carrier == @tech").empty: buses = n.buses.loc[ds.indexes["bus"]] - caps = map_country_bus(df.query("carrier == @tech"), buses) + caps = pm.export.map_country_bus(df.query("carrier == @tech"), buses) caps = caps.groupby(["bus"]).p_nom.sum() caps = pd.Series(data=caps, index=ds.indexes["bus"]).fillna(0) else: @@ -807,11 +812,10 @@ def add_nice_carrier_names(n, config): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake, sets_path_to_root - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("add_electricity") sets_path_to_root("pypsa-earth") + configure_logging(snakemake) n = pypsa.Network(snakemake.input.base_network) diff --git a/scripts/add_extra_components.py b/scripts/add_extra_components.py index 29c57e60c..69f139a79 100644 --- a/scripts/add_extra_components.py +++ b/scripts/add_extra_components.py @@ -52,12 +52,17 @@ - ``Stores`` of carrier 'H2' and/or 'battery' in combination with ``Links``. If this option is chosen, the script adds extra buses with corresponding carrier where energy ``Stores`` are attached and which are connected to the corresponding power buses via two links, one each for charging and discharging. This leads to three investment variables for the energy capacity, charging and discharging capacity of the storage unit. """ -import os + import numpy as np import pandas as pd import pypsa -from _helpers import configure_logging, create_logger +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) from add_electricity import ( _add_missing_carriers_from_costs, add_nice_carrier_names, @@ -265,10 +270,9 @@ def attach_hydrogen_pipelines(n, costs, config): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("add_extra_components", simpl="", clusters=10) + configure_logging(snakemake) n = pypsa.Network(snakemake.input.network) diff --git a/scripts/augmented_line_connections.py b/scripts/augmented_line_connections.py index 3b0072457..0bddebb30 100644 --- a/scripts/augmented_line_connections.py +++ b/scripts/augmented_line_connections.py @@ -28,13 +28,17 @@ Description ----------- """ -import os import networkx as nx import numpy as np import pandas as pd import pypsa -from _helpers import configure_logging, create_logger +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) from add_electricity import load_costs from networkx.algorithms import complement from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation @@ -52,12 +56,11 @@ def haversine(p): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "augmented_line_connections", network="elec", simpl="", clusters="54" ) + configure_logging(snakemake) n = pypsa.Network(snakemake.input.network) diff --git a/scripts/base_network.py b/scripts/base_network.py index 04e0c388d..8c2131512 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -55,7 +55,6 @@ Description ----------- """ -import os import geopandas as gpd import networkx as nx @@ -65,7 +64,14 @@ import scipy as sp import shapely.prepared import shapely.wkt -from _helpers import configure_logging, create_logger, read_csv_nafix +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_path_size, + mock_snakemake, + read_csv_nafix, +) from shapely.ops import unary_union logger = create_logger(__name__) @@ -202,7 +208,7 @@ def _load_lines_from_osm(fp_osm_lines): # TODO Seems to be not needed anymore def _load_links_from_osm(fp_osm_converters, base_network_config, voltages_config): # the links file can be empty - if os.path.getsize(fp_osm_converters) == 0: + if get_path_size(fp_osm_converters) == 0: links = pd.DataFrame() return links @@ -231,7 +237,7 @@ def _load_links_from_osm(fp_osm_converters, base_network_config, voltages_config def _load_converters_from_osm(fp_osm_converters, buses): # the links file can be empty - if os.path.getsize(fp_osm_converters) == 0: + if get_path_size(fp_osm_converters) == 0: converters = pd.DataFrame() return converters @@ -554,11 +560,9 @@ def base_network( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) - + change_to_script_dir(__file__) snakemake = mock_snakemake("base_network") + configure_logging(snakemake) inputs = snakemake.input diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py index d1e4f5e3c..9af7f1be9 100644 --- a/scripts/build_bus_regions.py +++ b/scripts/build_bus_regions.py @@ -42,12 +42,20 @@ Description ----------- """ -import os import geopandas as gpd +import numpy as np import pandas as pd import pypsa -from _helpers import REGION_COLS, configure_logging, create_logger +from _helpers import ( + REGION_COLS, + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) +from scipy.spatial import Voronoi +from shapely.geometry import Polygon logger = create_logger(__name__) @@ -67,14 +75,8 @@ def custom_voronoi_partition_pts(points, outline, add_bounds_shape=True, multipl polygons : N - ndarray[dtype=Polygon|MultiPolygon] """ - import numpy as np - from scipy.spatial import Voronoi - from shapely.geometry import Polygon - points = np.asarray(points) - polygons_arr = [] - if len(points) == 1: polygons_arr = [outline] else: @@ -148,10 +150,9 @@ def get_gadm_shape( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_bus_regions") + configure_logging(snakemake) countries = snakemake.params.countries diff --git a/scripts/build_cutout.py b/scripts/build_cutout.py index 06e5a24cd..186e52ab6 100644 --- a/scripts/build_cutout.py +++ b/scripts/build_cutout.py @@ -93,22 +93,25 @@ ----------- """ -import os import atlite import geopandas as gpd import pandas as pd -from _helpers import configure_logging, create_logger +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) logger = create_logger(__name__) if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_cutout", cutout="africa-2013-era5") + configure_logging(snakemake) cutout_params = snakemake.params.cutouts[snakemake.wildcards.cutout] diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py index c5dad677b..747c90f84 100644 --- a/scripts/build_demand_profiles.py +++ b/scripts/build_demand_profiles.py @@ -40,7 +40,7 @@ Then with a function that takes in the PyPSA network "base.nc", region and gadm shape data, the countries of interest, a scale factor, and the snapshots, it returns a csv file called "demand_profiles.csv", that allocates the load to the buses of the network according to GDP and population. """ -import os + from itertools import product import geopandas as gpd @@ -49,7 +49,15 @@ import pypsa import scipy.sparse as sparse import xarray as xr -from _helpers import configure_logging, create_logger, read_osm_config +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_path, + mock_snakemake, + read_osm_config, + sets_path_to_root, +) from shapely.prepared import prep from shapely.validation import make_valid @@ -108,7 +116,7 @@ def get_load_paths_gegis(ssp_parentfolder, config): load_paths = [] for continent in region_load: - load_path = os.path.join( + load_path = get_path( ssp_parentfolder, str(ssp), str(prediction_year), @@ -244,11 +252,10 @@ def upsample(cntry, group): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake, sets_path_to_root - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_demand_profiles") sets_path_to_root("pypsa-earth") + configure_logging(snakemake) n = pypsa.Network(snakemake.input.base_network) diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py index 9593f7767..2cea6f681 100644 --- a/scripts/build_natura_raster.py +++ b/scripts/build_natura_raster.py @@ -45,14 +45,23 @@ The output is a raster file with the name `natura.tiff` in the folder `resources/natura/`. """ import os +import pathlib import atlite import geopandas as gpd import numpy as np +import pandas as pd import rasterio as rio -from _helpers import configure_logging, create_logger +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_path, + mock_snakemake, +) from rasterio.features import geometry_mask from rasterio.warp import transform_bounds +from shapely.ops import unary_union logger = create_logger(__name__) @@ -65,14 +74,16 @@ def get_fileshapes(list_paths, accepted_formats=(".shp",)): list_fileshapes = [] for lf in list_paths: - if os.path.isdir(lf): # if it is a folder, then list all shapes files contained + if pathlib.Path( + lf + ).is_dir(): # if it is a folder, then list all shapes files contained # loop over all dirs and subdirs for path, subdirs, files in os.walk(lf): # loop over all files for subfile in files: # add the subfile if it is a shape file if subfile.endswith(accepted_formats): - list_fileshapes.append(os.path.join(path, subfile)) + list_fileshapes.append(str(get_path(path, subfile))) elif lf.endswith(accepted_formats): list_fileshapes.append(lf) @@ -119,9 +130,6 @@ def unify_protected_shape_areas(inputs, natura_crs, out_logging): ------- unified_shape : GeoDataFrame with a unified "multishape" """ - import pandas as pd - from shapely.ops import unary_union - from shapely.validation import make_valid if out_logging: logger.info("Stage 3/5: Unify protected shape area.") @@ -176,12 +184,11 @@ def unify_protected_shape_areas(inputs, natura_crs, out_logging): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "build_natura_raster", cutouts=["cutouts/africa-2013-era5.nc"] ) + configure_logging(snakemake) # get crs diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 1ebef49e9..1ab20b70a 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -5,14 +5,15 @@ # -*- coding: utf-8 -*- -import os - import geopandas as gpd import numpy as np import pandas as pd from _helpers import ( + build_directory, + change_to_script_dir, configure_logging, create_logger, + mock_snakemake, read_geojson, read_osm_config, sets_path_to_root, @@ -408,7 +409,6 @@ def connect_stations_same_station_id(lines, buses): station_id_list = buses.station_id.unique() add_lines = [] - from shapely.geometry import LineString for s_id in station_id_list: buses_station_id = buses[buses.station_id == s_id] @@ -875,16 +875,14 @@ def built_network( logger.info("Save outputs") # create clean directory if not already exist - if not os.path.exists(outputs["lines"]): - os.makedirs(os.path.dirname(outputs["lines"]), exist_ok=True) + build_directory(outputs["lines"]) to_csv_nafix(lines, outputs["lines"]) # Generate CSV to_csv_nafix(converters, outputs["converters"]) # Generate CSV to_csv_nafix(transformers, outputs["transformers"]) # Generate CSV # create clean directory if not already exist - if not os.path.exists(outputs["substations"]): - os.makedirs(os.path.dirname(outputs["substations"]), exist_ok=True) + build_directory(outputs["substations"]) # Generate CSV to_csv_nafix(buses, outputs["substations"]) @@ -893,10 +891,9 @@ def built_network( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_osm_network") + configure_logging(snakemake) # load default crs diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py index 71b701c1b..e1f8e91b7 100644 --- a/scripts/build_powerplants.py +++ b/scripts/build_powerplants.py @@ -100,8 +100,6 @@ 4. OSM extraction was supposed to be ignoring non-generation features like CHP and Natural Gas storage (in contrast to PPM). """ -import os - import geopandas as gpd import numpy as np import pandas as pd @@ -109,8 +107,13 @@ import pypsa import yaml from _helpers import ( + change_to_script_dir, configure_logging, create_logger, + get_current_directory_path, + get_path, + get_path_size, + mock_snakemake, read_csv_nafix, to_csv_nafix, two_digits_2_name_country, @@ -122,7 +125,7 @@ def convert_osm_to_pm(filepath_ppl_osm, filepath_ppl_pm): - if os.stat(filepath_ppl_osm).st_size == 0: + if get_path_size(filepath_ppl_osm) == 0: return to_csv_nafix(pd.DataFrame(), filepath_ppl_pm, index=False) add_ppls = read_csv_nafix(filepath_ppl_osm, index_col=0, dtype={"bus": "str"}) @@ -296,9 +299,7 @@ def replace_natural_gas_technology(df: pd.DataFrame): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_powerplants") configure_logging(snakemake) @@ -325,8 +326,8 @@ def replace_natural_gas_technology(df: pd.DataFrame): "Please check file configs/powerplantmatching_config.yaml" ) logger.info("Parsing OSM generator data to powerplantmatching format") - config["EXTERNAL_DATABASE"]["fn"] = os.path.join( - os.getcwd(), filepath_osm2pm_ppl + config["EXTERNAL_DATABASE"]["fn"] = get_path( + get_current_directory_path(), filepath_osm2pm_ppl ) else: # create an empty file diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py index eb79f5752..0ec3f3e3a 100644 --- a/scripts/build_renewable_profiles.py +++ b/scripts/build_renewable_profiles.py @@ -191,7 +191,6 @@ reached. """ import functools -import os import time from math import isnan @@ -202,7 +201,13 @@ import pandas as pd import progressbar as pgb import xarray as xr -from _helpers import configure_logging, create_logger, sets_path_to_root +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, + sets_path_to_root, +) from add_electricity import load_powerplants from dask.distributed import Client, LocalCluster from pypsa.geo import haversine @@ -486,11 +491,10 @@ def create_scaling_factor( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_renewable_profiles", technology="solar") sets_path_to_root("pypsa-earth") + configure_logging(snakemake) pgb.streams.wrap_stderr() diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index cc883ef9e..f6b4d5874 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -6,7 +6,7 @@ # -*- coding: utf-8 -*- import multiprocessing as mp -import os +import pathlib import shutil from itertools import takewhile from operator import attrgetter @@ -19,8 +19,13 @@ import requests import xarray as xr from _helpers import ( + build_directory, + change_to_script_dir, configure_logging, create_logger, + get_current_directory_path, + get_path, + mock_snakemake, sets_path_to_root, three_2_two_digits_country, two_2_three_digits_country, @@ -88,21 +93,21 @@ def download_GADM(country_code, update=False, out_logging=False): GADM_filename = get_GADM_filename(country_code) GADM_url = f"https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/{GADM_filename}.gpkg" - GADM_inputfile_gpkg = os.path.join( - os.getcwd(), + GADM_inputfile_gpkg = get_path( + get_current_directory_path(), "data", "gadm", GADM_filename, GADM_filename + ".gpkg", ) # Input filepath gpkg - if not os.path.exists(GADM_inputfile_gpkg) or update is True: + if not pathlib.Path(GADM_inputfile_gpkg).exists() or update is True: if out_logging: logger.warning( f"Stage 5 of 5: {GADM_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {GADM_inputfile_gpkg}" ) # create data/osm directory - os.makedirs(os.path.dirname(GADM_inputfile_gpkg), exist_ok=True) + build_directory(GADM_inputfile_gpkg) try: r = requests.get(GADM_url, stream=True, timeout=300) @@ -296,8 +301,7 @@ def country_cover(country_shapes, eez_shapes=None, out_logging=False, distance=0 def save_to_geojson(df, fn): - if os.path.exists(fn): - os.unlink(fn) # remove file if it exists + pathlib.Path(fn).unlink(missing_ok=True) # remove file if it exists if not isinstance(df, gpd.GeoDataFrame): df = gpd.GeoDataFrame(dict(geometry=df)) @@ -319,9 +323,9 @@ def load_EEZ(countries_codes, geo_crs, EEZ_gpkg="./data/eez/eez_v11.gpkg"): The dataset shall be downloaded independently by the user (see guide) or together with pypsa-earth package. """ - if not os.path.exists(EEZ_gpkg): + if not pathlib.Path(EEZ_gpkg).exists(): raise Exception( - f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {os.path.dirname(EEZ_gpkg)}" + f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {pathlib.Path(EEZ_gpkg).parent}" ) geodf_EEZ = gpd.read_file(EEZ_gpkg, engine="pyogrio").to_crs(geo_crs) @@ -479,17 +483,17 @@ def download_WorldPop_standard( f"https://data.worldpop.org/GIS/Population/Global_2000_2020_Constrained/2020/maxar_v1/{two_2_three_digits_country(country_code).upper()}/{WorldPop_filename}", ] - WorldPop_inputfile = os.path.join( - os.getcwd(), "data", "WorldPop", WorldPop_filename + WorldPop_inputfile = get_path( + get_current_directory_path(), "data", "WorldPop", WorldPop_filename ) # Input filepath tif - if not os.path.exists(WorldPop_inputfile) or update is True: + if not pathlib.Path(WorldPop_inputfile).exists() or update is True: if out_logging: logger.warning( f"Stage 3 of 5: {WorldPop_filename} does not exist, downloading to {WorldPop_inputfile}" ) # create data/osm directory - os.makedirs(os.path.dirname(WorldPop_inputfile), exist_ok=True) + build_directory(WorldPop_inputfile) loaded = False for WorldPop_url in WorldPop_urls: @@ -533,10 +537,10 @@ def download_WorldPop_API( WorldPop_filename = f"{two_2_three_digits_country(country_code).lower()}_ppp_{year}_UNadj_constrained.tif" # Request to get the file - WorldPop_inputfile = os.path.join( - os.getcwd(), "data", "WorldPop", WorldPop_filename + WorldPop_inputfile = get_path( + get_current_directory_path(), "data", "WorldPop", WorldPop_filename ) # Input filepath tif - os.makedirs(os.path.dirname(WorldPop_inputfile), exist_ok=True) + build_directory(WorldPop_inputfile) year_api = int(str(year)[2:]) loaded = False WorldPop_api_urls = [ @@ -571,17 +575,19 @@ def convert_GDP(name_file_nc, year=2015, out_logging=False): name_file_tif = name_file_nc[:-2] + "tif" # path of the nc file - GDP_nc = os.path.join(os.getcwd(), "data", "GDP", name_file_nc) # Input filepath nc + GDP_nc = get_path( + get_current_directory_path(), "data", "GDP", name_file_nc + ) # Input filepath nc # path of the tif file - GDP_tif = os.path.join( - os.getcwd(), "data", "GDP", name_file_tif + GDP_tif = get_path( + get_current_directory_path(), "data", "GDP", name_file_tif ) # Input filepath nc # Check if file exists, otherwise throw exception - if not os.path.exists(GDP_nc): + if not pathlib.Path(GDP_nc).exists(): raise Exception( - f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {os.path.dirname(GDP_nc)}" + f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {pathlib.Path(GDP_nc).parent}" ) # open nc dataset @@ -620,11 +626,11 @@ def load_GDP( # path of the nc file name_file_tif = name_file_nc[:-2] + "tif" - GDP_tif = os.path.join( - os.getcwd(), "data", "GDP", name_file_tif + GDP_tif = get_path( + get_current_directory_path(), "data", "GDP", name_file_tif ) # Input filepath tif - if update | (not os.path.exists(GDP_tif)): + if update | (not pathlib.Path(GDP_tif).exists()): if out_logging: logger.warning( f"Stage 5 of 5: File {name_file_tif} not found, the file will be produced by processing {name_file_nc}" @@ -1308,11 +1314,10 @@ def gadm( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_shapes") sets_path_to_root("pypsa-earth") + configure_logging(snakemake) out = snakemake.output diff --git a/scripts/build_test_configs.py b/scripts/build_test_configs.py index 349a1ef00..0dec51ae0 100644 --- a/scripts/build_test_configs.py +++ b/scripts/build_test_configs.py @@ -14,9 +14,13 @@ """ import collections.abc import copy -import os -from pathlib import Path +from _helpers import ( + change_to_script_dir, + get_current_directory_path, + get_path, + mock_snakemake, +) from ruamel.yaml import YAML @@ -37,7 +41,7 @@ def _parse_inputconfig(input_config, yaml): return input_config if isinstance(input_config, str): - input_config = Path(Path.cwd(), input_config) + input_config = get_path(get_current_directory_path(), input_config) with open(input_config) as fp: return yaml.load(fp) @@ -76,7 +80,7 @@ def create_test_config(default_config, diff_config, output_path): # Output path if isinstance(output_path, str): - output_path = Path(Path.cwd(), output_path) + output_path = get_path(get_current_directory_path(), output_path) # Save file yaml.dump(merged_config, output_path) @@ -86,9 +90,7 @@ def create_test_config(default_config, diff_config, output_path): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("build_test_configs") # Input paths diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 01b535454..fc5edbb69 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -5,16 +5,17 @@ # -*- coding: utf-8 -*- -import os - import geopandas as gpd import numpy as np import pandas as pd import reverse_geocode as rg from _helpers import ( REGION_COLS, + change_to_script_dir, configure_logging, create_logger, + get_path_size, + mock_snakemake, save_to_geojson, to_csv_nafix, ) @@ -902,7 +903,7 @@ def clean_data( ): logger.info("Process OSM lines") - if os.path.getsize(input_files["lines"]) > 0: + if get_path_size(input_files["lines"]) > 0: # Load raw data lines df_lines = load_network_data("lines", data_options) @@ -917,7 +918,7 @@ def clean_data( df_all_lines = df_lines # load cables only if data are stored - if os.path.getsize(input_files["cables"]) > 0: + if get_path_size(input_files["cables"]) > 0: logger.info("Add OSM cables to data") # Load raw data lines df_cables = load_network_data("cables", data_options) @@ -967,7 +968,7 @@ def clean_data( logger.info("Process OSM substations") - if os.path.getsize(input_files["substations"]) > 0: + if get_path_size(input_files["substations"]) > 0: df_all_substations = load_network_data("substations", data_options) # prepare dataset for substations @@ -1027,7 +1028,7 @@ def clean_data( logger.info("Process OSM generators") - if os.path.getsize(input_files["generators"]) > 0: + if get_path_size(input_files["generators"]) > 0: df_all_generators = gpd.read_file(input_files["generators"]) # prepare the generator dataset @@ -1062,10 +1063,9 @@ def clean_data( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("clean_osm_data") + configure_logging(snakemake) tag_substation = snakemake.params.clean_osm_data_options["tag_substation"] diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 34b116a99..de7d538fc 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -121,7 +121,7 @@ :align: center """ -import os +import pathlib from functools import reduce import geopandas as gpd @@ -131,9 +131,11 @@ import pypsa from _helpers import ( REGION_COLS, + change_to_script_dir, configure_logging, create_logger, get_aggregation_strategies, + mock_snakemake, sets_path_to_root, update_p_nom_max, ) @@ -145,6 +147,7 @@ busmap_by_kmeans, get_clustering_from_busmap, ) +from scipy.sparse import csgraph from shapely.geometry import Point idx = pd.IndexSlice @@ -428,7 +431,6 @@ def busmap_for_n_clusters( algorithm_kwds.setdefault("random_state", 0) def fix_country_assignment_for_hac(n): - from scipy.sparse import csgraph # overwrite country of nodes that are disconnected from their country-topology for country in n.buses.country.unique(): @@ -633,8 +635,7 @@ def clustering_for_n_clusters( def save_to_geojson(s, fn): - if os.path.exists(fn): - os.unlink(fn) + pathlib.Path(fn).unlink(missing_ok=True) df = s.reset_index() schema = {**gpd.io.file.infer_schema(df), "geometry": "Unknown"} df.to_file(fn, driver="GeoJSON", schema=schema) @@ -656,13 +657,12 @@ def cluster_regions(busmaps, inputs, output): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "cluster_network", network="elec", simpl="", clusters="min" ) sets_path_to_root("pypsa-earth") + configure_logging(snakemake) inputs, outputs, config = snakemake.input, snakemake.output, snakemake.config diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py index ec99baecd..2a1e366d2 100644 --- a/scripts/download_osm_data.py +++ b/scripts/download_osm_data.py @@ -26,11 +26,24 @@ - ``data/osm/out``: Prepared power data as .geojson and .csv files per country - ``resources/osm/raw``: Prepared and per type (e.g. cable/lines) aggregated power data as .geojson and .csv files """ -import os + +import sys + +print("sys path download_osm_data", sys.path) + +import pathlib import shutil -from pathlib import Path -from _helpers import configure_logging, create_logger, read_osm_config +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_current_directory_path, + get_path, + mock_snakemake, + read_osm_config, + sets_path_to_root, +) from earth_osm import eo logger = create_logger(__name__) @@ -92,17 +105,18 @@ def convert_iso_to_geofk( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake, sets_path_to_root - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("download_osm_data") sets_path_to_root("pypsa-earth") + configure_logging(snakemake) run = snakemake.config.get("run", {}) RDIR = run["name"] + "/" if run.get("name") else "" - store_path_resources = Path.joinpath(Path().cwd(), "resources", RDIR, "osm", "raw") - store_path_data = Path.joinpath(Path().cwd(), "data", "osm") + store_path_resources = get_path( + get_current_directory_path(), "resources", RDIR, "osm", "raw" + ) + store_path_data = get_path(get_current_directory_path(), "data", "osm") country_list = country_list_to_geofk(snakemake.params.countries) eo.save_osm_data( @@ -117,10 +131,9 @@ def convert_iso_to_geofk( out_aggregate=True, ) - out_path = Path.joinpath(store_path_resources, "out") + out_path = get_path(store_path_resources, "out") names = ["generator", "cable", "line", "substation"] out_formats = ["csv", "geojson"] - new_files = os.listdir(out_path) # list downloaded osm files # earth-osm (eo) only outputs files with content # If the file is empty, it is not created @@ -129,9 +142,9 @@ def convert_iso_to_geofk( # Rename and move osm files to the resources folder output for name in names: for f in out_formats: - new_file_name = Path.joinpath(store_path_resources, f"all_raw_{name}s.{f}") - old_files = list(Path(out_path).glob(f"*{name}.{f}")) - # if file is missing, create empty file, otherwise rename it an move it + new_file_name = get_path(store_path_resources, f"all_raw_{name}s.{f}") + old_files = list(pathlib.Path(out_path).glob(f"*{name}.{f}")) + # if file is missing, create empty file, otherwise rename it and move it if not old_files: with open(new_file_name, "w") as f: pass diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py index 5c544b61a..18a2e3a23 100644 --- a/scripts/make_statistics.py +++ b/scripts/make_statistics.py @@ -23,15 +23,24 @@ ------- This rule creates a dataframe containing in the columns the relevant statistics for the current run. """ -import os -from pathlib import Path + +import pathlib import geopandas as gpd import numpy as np import pandas as pd import pypsa import xarray as xr -from _helpers import create_logger, mock_snakemake, sets_path_to_root, to_csv_nafix +from _helpers import ( + change_to_script_dir, + create_country_list, + create_logger, + get_path_size, + mock_snakemake, + sets_path_to_root, + three_2_two_digits_country, + to_csv_nafix, +) from build_test_configs import create_test_config from shapely.validation import make_valid @@ -43,7 +52,7 @@ def _multi_index_scen(rulename, keys): def _mock_snakemake(rule, **kwargs): - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake(rule, **kwargs) sets_path_to_root("pypsa-earth") return snakemake @@ -71,7 +80,6 @@ def generate_scenario_by_country( out_dir : str (optional) Output directory where output configuration files are executed """ - from _helpers import create_country_list, three_2_two_digits_country clean_country_list = create_country_list(country_list) @@ -123,7 +131,7 @@ def collect_basic_osm_stats(path, rulename, header): """ Collect basic statistics on OSM data: number of items """ - if Path(path).is_file() and Path(path).stat().st_size > 0: + if pathlib.Path(path).is_file() and get_path_size(path) > 0: df = gpd.read_file(path) n_elem = len(df) @@ -142,7 +150,7 @@ def collect_network_osm_stats(path, rulename, header, metric_crs="EPSG:3857"): - length of the stored shapes - length of objects with tag_frequency == 0 (DC elements) """ - if Path(path).is_file() and Path(path).stat().st_size > 0: + if pathlib.Path(path).is_file() and get_path_size(path) > 0: df = gpd.read_file(path) n_elem = len(df) obj_length = ( @@ -244,7 +252,7 @@ def collect_bus_regions_stats(bus_region_rule="build_bus_regions"): df = pd.DataFrame() - if Path(fp_onshore).is_file() and Path(fp_offshore).is_file(): + if pathlib.Path(fp_onshore).is_file() and pathlib.Path(fp_offshore).is_file(): gdf_onshore = gpd.read_file(fp_onshore) gdf_offshore = gpd.read_file(fp_offshore) @@ -286,7 +294,7 @@ def capacity_stats(df): else: return df.groupby("carrier").p_nom.sum().astype(float) - if Path(network_path).is_file(): + if pathlib.Path(network_path).is_file(): n = pypsa.Network(network_path) lines_length = float((n.lines.length * n.lines.num_parallel).sum()) @@ -341,7 +349,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"): """ snakemake = _mock_snakemake(rulename) - if not Path(snakemake.output.africa_shape).is_file(): + if not pathlib.Path(snakemake.output.africa_shape).is_file(): return pd.DataFrame() df_continent = gpd.read_file(snakemake.output.africa_shape) @@ -352,7 +360,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"): .geometry.area.iloc[0] ) - if not Path(snakemake.output.gadm_shapes).is_file(): + if not pathlib.Path(snakemake.output.gadm_shapes).is_file(): return pd.DataFrame() df_gadm = gpd.read_file(snakemake.output.gadm_shapes) @@ -466,7 +474,7 @@ def collect_renewable_stats(rulename, technology): """ snakemake = _mock_snakemake(rulename, technology=technology) - if Path(snakemake.output.profile).is_file(): + if pathlib.Path(snakemake.output.profile).is_file(): res = xr.open_dataset(snakemake.output.profile) if technology == "hydro": @@ -499,7 +507,7 @@ def add_computational_stats(df, snakemake, column_name=None): comp_data = [np.nan] * 3 # total_time, mean_load and max_memory if snakemake.benchmark: - if not Path(snakemake.benchmark).is_file(): + if not pathlib.Path(snakemake.benchmark).is_file(): return df bench_data = pd.read_csv(snakemake.benchmark, delimiter="\t") @@ -579,9 +587,7 @@ def calculate_stats( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("make_statistics") sets_path_to_root("pypsa-earth") diff --git a/scripts/make_summary.py b/scripts/make_summary.py index 583766ac4..5ad6c5bfc 100644 --- a/scripts/make_summary.py +++ b/scripts/make_summary.py @@ -51,12 +51,20 @@ Replacing *summaries* with *plots* creates nice colored maps of the results. """ -import os + +import pathlib import pandas as pd import pypsa -from _helpers import configure_logging -from add_electricity import create_logger, load_costs, update_transmission_costs +from _helpers import ( + build_directory, + change_to_script_dir, + configure_logging, + create_logger, + get_path, + mock_snakemake, +) +from add_electricity import load_costs, update_transmission_costs idx = pd.IndexSlice @@ -496,7 +504,7 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all for label, filename in networks_dict.items(): print(label, filename) - if not os.path.exists(filename): + if not pathlib.Path(filename).exists(): print("does not exist!!") continue @@ -527,16 +535,14 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all def to_csv(dfs, dir): - os.makedirs(dir, exist_ok=True) + build_directory(dir, just_parent_directory=False) for key, df in dfs.items(): - df.to_csv(os.path.join(dir, f"{key}.csv")) + df.to_csv(get_path(dir, f"{key}.csv")) if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "make_summary", simpl="", @@ -551,9 +557,9 @@ def to_csv(dfs, dir): scenario_name = snakemake.config.get("run", {}).get("name", "") if scenario_name: - network_dir = os.path.join(network_dir, "results", scenario_name, "networks") + network_dir = get_path(network_dir, "results", scenario_name, "networks") else: - network_dir = os.path.join(network_dir, "results", "networks") + network_dir = get_path(network_dir, "results", "networks") configure_logging(snakemake) @@ -569,7 +575,7 @@ def expand_from_wildcard(key): ll = [snakemake.wildcards.ll] networks_dict = { - (simpl, clusters, l, opts): os.path.join( + (simpl, clusters, l, opts): get_path( network_dir, f"elec_s{simpl}_" f"{clusters}_ec_l{l}_{opts}.nc" ) for simpl in expand_from_wildcard("simpl") diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py index a448d142b..9ba7d4948 100644 --- a/scripts/monte_carlo.py +++ b/scripts/monte_carlo.py @@ -17,7 +17,7 @@ add_to_snakefile: false # When set to true, enables Monte Carlo sampling samples: 9 # number of optimizations. Note that number of samples when using scipy has to be the square of a prime number sampling_strategy: "chaospy" # "pydoe2", "chaospy", "scipy", packages that are supported - seed: 42 # set seedling for reproducibilty + seed: 42 # set seedling for reproducibility uncertainties: loads_t.p_set: type: uniform @@ -67,17 +67,16 @@ wildcard {unc}, which is described in the config.yaml and created in the Snakefile as a range from 0 to (total number of) SAMPLES. """ -import os import chaospy import numpy as np import pandas as pd import pypsa import seaborn as sns -from _helpers import configure_logging, create_logger +from _helpers import change_to_script_dir, configure_logging, create_logger from pyDOE2 import lhs from scipy.stats import beta, gamma, lognorm, norm, qmc, triang -from sklearn.preprocessing import MinMaxScaler +from sklearn.preprocessing import MinMaxScaler, minmax_scale from solve_network import * logger = create_logger(__name__) @@ -100,8 +99,6 @@ def monte_carlo_sampling_pydoe2( Adapted from Disspaset: https://github.com/energy-modelling-toolkit/Dispa-SET/blob/master/scripts/build_and_run_hypercube.py Documentation on PyDOE2: https://github.com/clicumu/pyDOE2 (fixes latin_cube errors) """ - from pyDOE2 import lhs - from scipy.stats import qmc # Generate a Nfeatures-dimensional latin hypercube varying between 0 and 1: lh = lhs( @@ -135,8 +132,6 @@ def monte_carlo_sampling_chaospy( Documentation on Chaospy: https://github.com/clicumu/pyDOE2 (fixes latin_cube errors) Documentation on Chaospy latin-hyper cube (quasi-Monte Carlo method): https://chaospy.readthedocs.io/en/master/user_guide/fundamentals/quasi_random_samples.html#Quasi-random-samples """ - import chaospy - from scipy.stats import qmc # generate a Nfeatures-dimensional latin hypercube varying between 0 and 1: N_FEATURES = "chaospy.Uniform(0, 1), " * N_FEATURES @@ -178,7 +173,6 @@ def monte_carlo_sampling_scipy( Documentation for Latin Hypercube: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.qmc.LatinHypercube.html#scipy.stats.qmc.LatinHypercube Orthogonal LHS is better than basic LHS: https://github.com/scipy/scipy/pull/14546/files, https://en.wikipedia.org/wiki/Latin_hypercube_sampling """ - from scipy.stats import qmc sampler = qmc.LatinHypercube( d=N_FEATURES, @@ -232,36 +226,37 @@ def rescale_distribution( - The function supports rescaling for uniform, normal, lognormal, triangle, beta, and gamma distributions. - The rescaled samples will have values in the range [0, 1]. """ - from scipy.stats import beta, gamma, lognorm, norm, qmc, triang - from sklearn.preprocessing import MinMaxScaler, minmax_scale for idx, value in enumerate(uncertainties_values): dist = value.get("type") params = value.get("args") - match dist: - case "uniform": - l_bounds, u_bounds = params - latin_hypercube[:, idx] = minmax_scale( - latin_hypercube[:, idx], feature_range=(l_bounds, u_bounds) - ) - case "normal": - mean, std = params - latin_hypercube[:, idx] = norm.ppf(latin_hypercube[:, idx], mean, std) - case "lognormal": - shape = params[0] - latin_hypercube[:, idx] = lognorm.ppf(latin_hypercube[:, idx], s=shape) - case "triangle": - mid_point = params[0] - latin_hypercube[:, idx] = triang.ppf(latin_hypercube[:, idx], mid_point) - case "beta": - a, b = params - latin_hypercube[:, idx] = beta.ppf(latin_hypercube[:, idx], a, b) - case "gamma": - shape, scale = params - latin_hypercube[:, idx] = gamma.ppf( - latin_hypercube[:, idx], shape, scale - ) + if dist == "uniform": + l_bounds, u_bounds = params + latin_hypercube[:, idx] = minmax_scale( + latin_hypercube[:, idx], feature_range=(l_bounds, u_bounds) + ) + elif dist == "normal": + mean, std = params + latin_hypercube[:, idx] = norm.ppf(latin_hypercube[:, idx], mean, std) + elif dist == "lognormal": + shape = params[0] + latin_hypercube[:, idx] = lognorm.ppf(latin_hypercube[:, idx], s=shape) + elif dist == "triangle": + mid_point = params[0] + latin_hypercube[:, idx] = triang.ppf(latin_hypercube[:, idx], mid_point) + elif dist == "beta": + a, b = params + latin_hypercube[:, idx] = beta.ppf(latin_hypercube[:, idx], a, b) + elif dist == "gamma": + shape, scale = params + latin_hypercube[:, idx] = gamma.ppf(latin_hypercube[:, idx], shape, scale) + else: + exception_message = ( + f"The value {dist} is not among the allowed ones: uniform, normal, lognormal, " + f"triangle, beta, gamma" + ) + raise NotImplementedError(exception_message) # samples space needs to be from 0 to 1 mm = MinMaxScaler(feature_range=(0, 1), clip=True) @@ -348,9 +343,7 @@ def validate_parameters( if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "monte_carlo", simpl="", diff --git a/scripts/non_workflow/zip_folder.py b/scripts/non_workflow/zip_folder.py index 0bac2de21..63dbddf24 100644 --- a/scripts/non_workflow/zip_folder.py +++ b/scripts/non_workflow/zip_folder.py @@ -8,9 +8,8 @@ Module to zip the desired folders to be stored in google drive, or equivalent. """ import os +import pathlib import zipfile -from os.path import basename -from xml.etree.ElementInclude import include from _helpers import sets_path_to_root @@ -25,7 +24,7 @@ def zipFilesInDir(dirName, zipFileName, filter, include_parent=True): for filename in filenames: if filter(filename): # create complete filepath of file in directory - filePath = os.path.join(folderName, filename) + filePath = str(pathlib.Path(folderName, filename)) # path of the zip file if include_parent: @@ -41,7 +40,7 @@ def zipFilesInDir(dirName, zipFileName, filter, include_parent=True): if __name__ == "__main__": # Set path to this file - os.chdir(os.path.dirname(os.path.abspath(__file__))) + os.chdir(pathlib.Path(__file__).parent.absolute()) # Required to set path to pypsa-earth sets_path_to_root("pypsa-earth") diff --git a/scripts/plot_network.py b/scripts/plot_network.py index 8f2763509..07c0115e4 100644 --- a/scripts/plot_network.py +++ b/scripts/plot_network.py @@ -17,21 +17,22 @@ ----------- """ -import os - import cartopy.crs as ccrs import geopandas as gpd import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd +import pypsa from _helpers import ( aggregate_costs, aggregate_p, + change_to_script_dir, configure_logging, create_logger, - load_network_for_plots, + mock_snakemake, ) +from add_electricity import load_costs, update_transmission_costs from matplotlib.legend_handler import HandlerPatch from matplotlib.patches import Circle, Ellipse @@ -356,11 +357,43 @@ def split_costs(n): ax.grid(True, axis="y", color="k", linestyle="dotted") +def load_network_for_plots( + fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True +): + + n = pypsa.Network(fn) + + n.loads["carrier"] = n.loads.bus.map(n.buses.carrier) + " load" + n.stores["carrier"] = n.stores.bus.map(n.buses.carrier) + + n.links["carrier"] = ( + n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier) + ) + n.lines["carrier"] = "AC line" + n.transformers["carrier"] = "AC transformer" + + n.lines["s_nom"] = n.lines["s_nom_min"] + n.links["p_nom"] = n.links["p_nom_min"] + + if combine_hydro_ps: + n.storage_units.loc[ + n.storage_units.carrier.isin({"PHS", "hydro"}), "carrier" + ] = "hydro+PHS" + + # if the carrier was not set on the heat storage units + # bus_carrier = n.storage_units.bus.map(n.buses.carrier) + # n.storage_units.loc[bus_carrier == "heat","carrier"] = "water tanks" + + Nyears = n.snapshot_weightings.objective.sum() / 8760.0 + costs = load_costs(tech_costs, cost_config, elec_config, Nyears) + update_transmission_costs(n, costs) + + return n + + if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "plot_network", network="elec", diff --git a/scripts/plot_summary.py b/scripts/plot_summary.py index 1491b6692..d89ef53ec 100644 --- a/scripts/plot_summary.py +++ b/scripts/plot_summary.py @@ -16,11 +16,16 @@ Description ----------- """ -import os import matplotlib.pyplot as plt import pandas as pd -from _helpers import configure_logging, create_logger +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_path, + mock_snakemake, +) logger = create_logger(__name__) @@ -217,9 +222,8 @@ def plot_energy(infn, snmk, fn=None): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "plot_summary", summary="energy", @@ -241,7 +245,7 @@ def plot_energy(infn, snmk, fn=None): logger.error(f"plotting function for {summary} has not been defined") func( - os.path.join(snakemake.input[0], f"{summary}.csv"), + get_path(snakemake.input[0], f"{summary}.csv"), snakemake, snakemake.output[0], ) diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py index 3b92cd31d..59c34ea3a 100755 --- a/scripts/prepare_network.py +++ b/scripts/prepare_network.py @@ -56,7 +56,7 @@ for all ``scenario`` s in the configuration file the rule :mod:`prepare_network`. """ -import os +import pathlib import re from zipfile import ZipFile @@ -65,7 +65,14 @@ import pandas as pd import pypsa import requests -from _helpers import configure_logging, create_logger +from _helpers import ( + change_to_script_dir, + configure_logging, + create_logger, + get_current_directory_path, + get_path, + mock_snakemake, +) from add_electricity import load_costs, update_transmission_costs idx = pd.IndexSlice @@ -87,13 +94,14 @@ def download_emission_data(): with requests.get(url) as rq: with open("data/co2.zip", "wb") as file: file.write(rq.content) - rootpath = os.getcwd() - file_path = os.path.join(rootpath, "data/co2.zip") + root_path = get_current_directory_path() + file_path = get_path(root_path, "data/co2.zip") with ZipFile(file_path, "r") as zipObj: zipObj.extract( - "v60_CO2_excl_short-cycle_org_C_1970_2018.xls", rootpath + "/data" + "v60_CO2_excl_short-cycle_org_C_1970_2018.xls", + get_path(root_path, "data"), ) - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) return "v60_CO2_excl_short-cycle_org_C_1970_2018.xls" except: logger.error(f"Failed download resource from '{url}'.") @@ -120,7 +128,7 @@ def emission_extractor(filename, emission_year, country_names): """ # data reading process - datapath = os.path.join(os.getcwd(), "data", filename) + datapath = get_path(get_current_directory_path(), "data", filename) df = pd.read_excel(datapath, sheet_name="v6.0_EM_CO2_fossil_IPCC1996", skiprows=8) df.columns = df.iloc[0] df = df.set_index("Country_code_A3") @@ -317,9 +325,7 @@ def set_line_nom_max(n, s_nom_max_set=np.inf, p_nom_max_set=np.inf): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "prepare_network", simpl="", diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py index 1583cc245..d425a781d 100644 --- a/scripts/retrieve_databundle_light.py +++ b/scripts/retrieve_databundle_light.py @@ -81,7 +81,7 @@ """ import datetime as dt -import os +import pathlib import re from zipfile import ZipFile @@ -89,9 +89,14 @@ import pandas as pd import yaml from _helpers import ( + change_to_script_dir, configure_logging, create_country_list, create_logger, + get_current_directory_path, + get_path, + get_relative_path, + mock_snakemake, progress_retrieve, sets_path_to_root, ) @@ -119,9 +124,9 @@ def load_databundle_config(config): return config -def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=False): +def download_and_unzip_zenodo(config, root_path, hot_run=True, disable_progress=False): """ - download_and_unzip_zenodo(config, rootpath, dest_path, hot_run=True, + download_and_unzip_zenodo(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download and unzip the data from zenodo @@ -130,7 +135,7 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded @@ -143,8 +148,8 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F True when download is successful, False otherwise """ resource = config["category"] - file_path = os.path.join(rootpath, "tempfile.zip") - destination = os.path.relpath(config["destination"]) + file_path = get_path(root_path, "tempfile.zip") + destination = get_relative_path(config["destination"]) url = config["urls"]["zenodo"] if hot_run: @@ -155,7 +160,7 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F with ZipFile(file_path, "r") as zipObj: # Extract all the contents of zip file in current directory zipObj.extractall(path=destination) - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.") except: logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.") @@ -164,9 +169,9 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F return True -def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=False): +def download_and_unzip_gdrive(config, root_path, hot_run=True, disable_progress=False): """ - download_and_unzip_gdrive(config, rootpath, dest_path, hot_run=True, + download_and_unzip_gdrive(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download and unzip the data from google drive @@ -175,7 +180,7 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded @@ -188,8 +193,8 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F True when download is successful, False otherwise """ resource = config["category"] - file_path = os.path.join(rootpath, "tempfile.zip") - destination = os.path.relpath(config["destination"]) + file_path = get_path(root_path, "tempfile.zip") + destination = get_relative_path(config["destination"]) url = config["urls"]["gdrive"] # retrieve file_id from path @@ -216,8 +221,7 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F # if hot run enabled if hot_run: # remove file - if os.path.exists(file_path): - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) # download file from google drive gdd.download_file_from_google_drive( file_id=file_id, @@ -238,10 +242,10 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F def download_and_unzip_protectedplanet( - config, rootpath, attempts=3, hot_run=True, disable_progress=False + config, root_path, attempts=3, hot_run=True, disable_progress=False ): """ - download_and_unzip_protectedplanet(config, rootpath, dest_path, + download_and_unzip_protectedplanet(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download and unzip the data by category from protectedplanet @@ -250,7 +254,7 @@ def download_and_unzip_protectedplanet( ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository attempts : int (default 3) Number of attempts to download the data by month. @@ -266,8 +270,8 @@ def download_and_unzip_protectedplanet( True when download is successful, False otherwise """ resource = config["category"] - file_path = os.path.join(rootpath, "tempfile_wpda.zip") - destination = os.path.relpath(config["destination"]) + file_path = get_path(root_path, "tempfile_wpda.zip") + destination = get_relative_path(config["destination"]) url = config["urls"]["protectedplanet"] def get_first_day_of_month(date): @@ -282,8 +286,7 @@ def get_first_day_of_previous_month(date): ) if hot_run: - if os.path.exists(file_path): - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) downloaded = False @@ -320,17 +323,17 @@ def get_first_day_of_previous_month(date): for fzip in zip_files: # final path of the file try: - inner_zipname = os.path.join(destination, fzip) + inner_zipname = get_path(destination, fzip) zip_obj.extract(fzip, path=destination) - dest_nested = os.path.join(destination, fzip.split(".")[0]) + dest_nested = get_path(destination, fzip.split(".")[0]) with ZipFile(inner_zipname, "r") as nested_zip: nested_zip.extractall(path=dest_nested) # remove inner zip file - os.remove(inner_zipname) + pathlib.Path(inner_zipname).unlink(missing_ok=True) logger.info(f"{resource} - Successfully unzipped file '{fzip}'") except: @@ -340,7 +343,7 @@ def get_first_day_of_previous_month(date): # close and remove outer zip file zip_obj.close() - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) logger.info( f"Downloaded resource '{resource_iter}' from cloud '{url_iter}'." @@ -391,8 +394,7 @@ def download_and_unpack( True when download is successful, False otherwise """ if hot_run: - if os.path.exists(file_path): - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) try: logger.info(f"Downloading resource '{resource}' from cloud '{url}'.") @@ -404,9 +406,9 @@ def download_and_unpack( # then unzip it and remove the original file if unzip: with ZipFile(file_path, "r") as zipfile: - zipfile.extractall(destination) + zipfile.extractall(path=destination) - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.") return True except: @@ -414,9 +416,9 @@ def download_and_unpack( return False -def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=False): +def download_and_unzip_direct(config, root_path, hot_run=True, disable_progress=False): """ - download_and_unzip_direct(config, rootpath, dest_path, hot_run=True, + download_and_unzip_direct(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download the data by category from a direct url with no processing. @@ -426,7 +428,7 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded @@ -439,10 +441,10 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F True when download is successful, False otherwise """ resource = config["category"] - destination = os.path.relpath(config["destination"]) + destination = get_relative_path(config["destination"]) url = config["urls"]["direct"] - file_path = os.path.join(destination, os.path.basename(url)) + file_path = get_path(destination, pathlib.Path(url).name) unzip = config.get("unzip", False) @@ -457,10 +459,10 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F def download_and_unzip_hydrobasins( - config, rootpath, hot_run=True, disable_progress=False + config, root_path, hot_run=True, disable_progress=False ): """ - download_and_unzip_basins(config, rootpath, dest_path, hot_run=True, + download_and_unzip_basins(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download and unzip the data for hydrobasins from HydroBASINS database @@ -480,7 +482,7 @@ def download_and_unzip_hydrobasins( ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded @@ -493,7 +495,7 @@ def download_and_unzip_hydrobasins( True when download is successful, False otherwise """ resource = config["category"] - destination = os.path.relpath(config["destination"]) + destination = get_relative_path(config["destination"]) url_templ = config["urls"]["hydrobasins"]["base_url"] suffix_list = config["urls"]["hydrobasins"]["suffixes"] @@ -504,7 +506,7 @@ def download_and_unzip_hydrobasins( for rg in suffix_list: url = url_templ + "hybas_" + rg + "_lev" + level_code + "_v1c.zip" - file_path = os.path.join(destination, os.path.basename(url)) + file_path = get_path(destination, pathlib.Path(url).name) all_downloaded &= download_and_unpack( url=url, @@ -520,9 +522,9 @@ def download_and_unzip_hydrobasins( return all_downloaded -def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=False): +def download_and_unzip_post(config, root_path, hot_run=True, disable_progress=False): """ - download_and_unzip_post(config, rootpath, dest_path, hot_run=True, + download_and_unzip_post(config, root_path, dest_path, hot_run=True, disable_progress=False) Function to download the data by category from a post request. @@ -531,7 +533,7 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal ------ config : Dict Configuration data for the category to download - rootpath : str + root_path : str Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded @@ -544,18 +546,17 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal True when download is successful, False otherwise """ resource = config["category"] - destination = os.path.relpath(config["destination"]) + destination = get_relative_path(config["destination"]) # load data for post method postdata = config["urls"]["post"] # remove url feature url = postdata.pop("url") - file_path = os.path.join(destination, os.path.basename(url)) + file_path = get_path(destination, pathlib.Path(url).name) if hot_run: - if os.path.exists(file_path): - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) # try: logger.info(f"Downloading resource '{resource}' from cloud '{url}'.") @@ -571,9 +572,9 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal # then unzip it and remove the original file if config.get("unzip", False): with ZipFile(file_path, "r") as zipfile: - zipfile.extractall(destination) + zipfile.extractall(path=destination) - os.remove(file_path) + pathlib.Path(file_path).unlink(missing_ok=True) logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.") # except: # logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.") @@ -804,7 +805,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level): gpdf_list = [None] * len(files_to_merge) logger.info("Merging hydrobasins files into: " + output_fl) for i, f_name in tqdm(enumerate(files_to_merge)): - gpdf_list[i] = gpd.read_file(os.path.join(basins_path, f_name)) + gpdf_list[i] = gpd.read_file(get_path(basins_path, f_name)) fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list)).drop_duplicates( subset="HYBAS_ID", ignore_index=True ) @@ -813,16 +814,14 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level): if __name__ == "__main__": if "snakemake" not in globals(): - os.chdir(os.path.dirname(os.path.abspath(__file__))) - from _helpers import mock_snakemake - + change_to_script_dir(__file__) snakemake = mock_snakemake("retrieve_databundle_light") # TODO Make logging compatible with progressbar (see PR #102, PyPSA-Eur) configure_logging(snakemake) sets_path_to_root("pypsa-earth") - rootpath = os.getcwd() + root_path = get_current_directory_path() tutorial = snakemake.params.tutorial countries = snakemake.params.countries logger.info(f"Retrieving data for {len(countries)} countries.") @@ -866,7 +865,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level): try: download_and_unzip = globals()[f"download_and_unzip_{host}"] if download_and_unzip( - config_bundles[b_name], rootpath, disable_progress=disable_progress + config_bundles[b_name], root_path, disable_progress=disable_progress ): downloaded_bundle = True except Exception: diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 30d60e32f..eae4ed1f1 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -84,7 +84,6 @@ 4. Optionally, if an integer were provided for the wildcard ``{simpl}`` (e.g. ``networks/elec_s500.nc``), the network is clustered to this number of clusters with the routines from the ``cluster_network`` rule with the function ``cluster_network.cluster(...)``. This step is usually skipped! """ -import os import sys from functools import reduce @@ -94,9 +93,11 @@ import pypsa import scipy as sp from _helpers import ( + change_to_script_dir, configure_logging, create_logger, get_aggregation_strategies, + mock_snakemake, update_p_nom_max, ) from add_electricity import load_costs @@ -961,9 +962,7 @@ def merge_isolated_nodes(n, threshold, aggregation_strategies=dict()): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake("simplify_network", simpl="") configure_logging(snakemake) diff --git a/scripts/solve_network.py b/scripts/solve_network.py index f83b47478..f057b8823 100755 --- a/scripts/solve_network.py +++ b/scripts/solve_network.py @@ -77,14 +77,18 @@ for all ``scenario`` s in the configuration file the rule :mod:`solve_network`. """ -import os import re -from pathlib import Path import numpy as np import pandas as pd import pypsa -from _helpers import configure_logging, create_logger +from _helpers import ( + build_directory, + change_to_script_dir, + configure_logging, + create_logger, + mock_snakemake, +) from pypsa.descriptors import get_switchable_as_dense as get_as_dense from pypsa.linopf import ( define_constraints, @@ -542,9 +546,7 @@ def solve_network(n, config, opts="", **kwargs): if __name__ == "__main__": if "snakemake" not in globals(): - from _helpers import mock_snakemake - - os.chdir(os.path.dirname(os.path.abspath(__file__))) + change_to_script_dir(__file__) snakemake = mock_snakemake( "solve_network", simpl="", @@ -556,7 +558,7 @@ def solve_network(n, config, opts="", **kwargs): tmpdir = snakemake.params.solving.get("tmpdir") if tmpdir is not None: - Path(tmpdir).mkdir(parents=True, exist_ok=True) + build_directory(tmpdir, just_parent_directory=False) opts = snakemake.wildcards.opts.split("-") solve_opts = snakemake.params.solving["options"] diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 000000000..fa7a7644d --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 000000000..fa8cbd171 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +import pathlib +import shutil + +import pytest + +_content_temp_file = "content" +_name_temp_file = "hello.txt" +_temp_content_dir = "temp_content_dir" +_sub_temp_content_dir = "sub_temp_content_dir" + + +@pytest.fixture(scope="function") +def get_temp_file(tmpdir): + p = tmpdir.join(_name_temp_file) + p.write(_content_temp_file) + yield p + pathlib.Path(p).unlink(missing_ok=True) + + +@pytest.fixture(scope="function") +def get_temp_folder(tmpdir): + temp_content_dir = tmpdir.join(_temp_content_dir) + sub_temp_content_dir = temp_content_dir.join(_sub_temp_content_dir) + yield sub_temp_content_dir + shutil.rmtree(str(sub_temp_content_dir)) diff --git a/test/test_helpers.py b/test/test_helpers.py new file mode 100644 index 000000000..4d65adea2 --- /dev/null +++ b/test/test_helpers.py @@ -0,0 +1,469 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +import os +import pathlib +import shutil +import sys +from test.conftest import ( + _content_temp_file, + _name_temp_file, + _sub_temp_content_dir, + _temp_content_dir, + get_temp_file, +) + +import numpy as np +import pandas as pd + +sys.path.append("./scripts") + +from _helpers import ( + aggregate_fuels, + build_directory, + change_to_script_dir, + country_name_2_two_digits, + get_conv_factors, + get_current_directory_path, + get_path, + get_path_size, + get_relative_path, + modify_commodity, + safe_divide, + three_2_two_digits_country, + two_2_three_digits_country, + two_digits_2_name_country, +) + +path_cwd = str(pathlib.Path.cwd()) + + +original_commodity_data = [ + "Biogases", + "Fuelwood", + "of which: fishing", + "Natural gas liquids", + "Naphtha", + "Motor Gasoline", + "Motor gasoline", + "Gasoline-type jet fuel", + "Peat products", + "Peat Products", + "Direct use of geothermal heat", + "Additives and Oxygenates", + "Electricity", + "Animal waste", + "animal waste", + "Refinery gas", + "Refinery Gas", + "Fuel oil", + "Oil shale", + "Oil Shale", + "Lignite", + "Falling water", + "Petroleum coke", + "Petroleum Coke", + "Aviation gasoline", + "Ethane", + "Natural gas (including LNG)", + "Natural gas", + "Natural Gas (including LNG)", + "Other bituminous coal", + "Paraffin waxes", + "Hard coal", + "Coal", + "Hrad coal", + "Coke Oven Gas", + "Gasworks Gas", + "Brown coal briquettes", + "Brown Coal Briquettes", + "Liquefied petroleum gas (LPG)", + "Liquified Petroleum Gas (LPG)", + "Sub-bituminous coal", + "Kerosene-type Jet Fuel", + "Charcoal", + "Heat", + "Gas coke", + "Gas Coke", + "Patent fuel", + "Peat (for fuel use)", + "Peat", + "Coal Tar", + "Biogasoline", + "Coking coal", + "Electricity generating capacity", + "Anthracite", + "Coke oven coke", + "Coke-oven coke", + "Coke Oven Coke", + "Conventional crude oil", + "Crude petroleum", + "Brown coal", + "Lignite brown coal", + "Lignite brown coal- recoverable resources", + "Biodiesel", + "Lubricants", + "Black Liquor", + "Gas Oil/ Diesel Oil", + "Gas Oil/ Diesel Oil ", + "Gas Oil/Diesel Oil", + "Bagasse", + "Direct use of solar thermal heat", + "Bio jet kerosene", + "Blast Furnace Gas", + "Blast furnace gas", + "Bitumen", +] + +modified_commodity_data = [ + "biogases", + "fuelwood", + "of which: fishing", + "natural gas liquids", + "naphtha", + "motor gasoline", + "gasoline-type jet fuel", + "peat products", + "direct use of geothermal heat", + "additives and oxygenates", + "electricity", + "animal waste", + "refinery gas", + "fuel oil", + "oil shale", + "lignite", + "falling water", + "petroleum coke", + "aviation gasoline", + "ethane", + "natural gas (including lng)", + "natural gas", + "other bituminous coal", + "paraffin waxes", + "hard coal", + "coal", + "coke-oven gas", + "gasworks gas", + "brown coal briquettes", + "liquefied petroleum gas (lpg)", + "sub-bituminous coal", + "kerosene-type jet fuel", + "charcoal", + "heat", + "gas coke", + "patent fuel", + "peat (for fuel use)", + "peat", + "coal tar", + "biogasoline", + "coking coal", + "electricity generating capacity", + "anthracite", + "coke-oven coke", + "conventional crude oil", + "crude petroleum", + "brown coal", + "lignite brown coal", + "lignite brown coal - recoverable resources", + "biodiesel", + "lubricants", + "black liquor", + "gas oil/ diesel oil", + "bagasse", + "direct use of solar thermal heat", + "bio jet kerosene", + "blast furnace gas", + "bitumen", +] + +original_commodity_dataframe = pd.DataFrame( + original_commodity_data, columns=["Commodity"] +) +modified_commodity_dataframe = pd.DataFrame( + modified_commodity_data, columns=["Commodity"] +) + + +def test_build_directory(get_temp_folder, tmpdir): + """ + Verify the directory tree returned by build_directory() + + Please note: + -) build_directory(path, just_parent_directory=True) is equivalent to os.makedirs(os.path.dirname(path)). + Given a path tmpdir/temp_content_dir/sub_temp_content_dir, it will create just tmpdir/temp_content_dir/ + -) build_directory(path, just_parent_directory=False) is equivalent to os.makedirs(path). Given a path + tmpdir/temp_content_dir/sub_temp_content_dir, it will create tmpdir/temp_content_dir/sub_temp_content_dir + """ + + # test with pathlib + build_directory(get_temp_folder, just_parent_directory=True) + just_parent_list_pathlib = [] + for root, dirs, files in os.walk(tmpdir): + just_parent_list_pathlib.append(str(get_path(root))) + + assert len(just_parent_list_pathlib) == 2 + assert just_parent_list_pathlib[0] == str(tmpdir) + assert just_parent_list_pathlib[1] == str(tmpdir.join(_temp_content_dir)) + + # remove the temporary folder tmpdir/temp_content_dir/ + shutil.rmtree(pathlib.Path(tmpdir, _temp_content_dir)) + + # test with os.makedirs. Please note for exist_ok=False, + # a FileExistsError is raised if the target directory + # already exists. Hence, setting exist_ok=False ensures + # that the removal with shutil.rmtree was successful + os.makedirs(os.path.dirname(get_temp_folder), exist_ok=False) + just_parent_list_os = [] + for root, dirs, files in os.walk(tmpdir): + just_parent_list_os.append(str(get_path(root))) + + assert just_parent_list_pathlib == just_parent_list_os + + # test with pathlib + build_directory(get_temp_folder, just_parent_directory=False) + full_tree_list_pathlib = [] + for root, dirs, files in os.walk(tmpdir): + full_tree_list_pathlib.append(str(get_path(root))) + + assert len(full_tree_list_pathlib) == 3 + assert full_tree_list_pathlib[0] == str(tmpdir) + assert full_tree_list_pathlib[1] == str(tmpdir.join(_temp_content_dir)) + assert full_tree_list_pathlib[2] == str( + tmpdir.join(_temp_content_dir, _sub_temp_content_dir) + ) + + # remove the temporary folder tmpdir/temp_content_dir/* + shutil.rmtree(pathlib.Path(tmpdir, _temp_content_dir)) + + # test with os.makedirs. Please note for exist_ok=False, + # a FileExistsError is raised if the target directory + # already exists. Hence, setting exist_ok=False ensures + # that the removal with shutil.rmtree was successful + os.makedirs(get_temp_folder, exist_ok=False) + full_tree_list_os = [] + for root, dirs, files in os.walk(tmpdir): + full_tree_list_os.append(str(get_path(root))) + + assert full_tree_list_os == full_tree_list_pathlib + + +def test_change_to_script_dir(): + """ + Verify the path returned by change_to_script_dir() + """ + change_to_script_dir(__file__) + assert str(pathlib.Path.cwd()) == path_cwd + os.sep + "test" + change_to_script_dir(".") + assert str(pathlib.Path.cwd()) == path_cwd + + +def test_get_path(): + """ + Verify the path returned by get_path() + """ + file_name_path_one = get_path( + path_cwd, + "sub_path_1", + "sub_path_2", + "sub_path_3", + "sub_path_4", + "sub_path_5", + "file.nc", + ) + path_name_path_two = get_path( + pathlib.Path(__file__).parent, "..", "logs", "rule.log" + ) + assert str(file_name_path_one) == os.path.join( + path_cwd, + "sub_path_1", + "sub_path_2", + "sub_path_3", + "sub_path_4", + "sub_path_5", + "file.nc", + ) + assert str(path_name_path_two) == str( + pathlib.Path(__file__).parent.joinpath("..", "logs", "rule.log") + ) + + +def test_get_path_size(get_temp_file): + """ + Verify the path size (in bytes) returned by get_path_size() + """ + path = get_temp_file + file_size = get_path_size(path) + assert file_size == os.stat(path).st_size + assert file_size == len(_content_temp_file) + + +def test_get_current_directory_path(): + """ + Verify the current directory path returned by get_current_directory_path() + """ + path = get_current_directory_path() + assert str(path) == os.getcwd() + + +def test_get_relative_path(get_temp_file): + """ + Verify the relative path returned by get_relative_path() + """ + path = get_temp_file + # path relative to the parent directory of the temp file + relative_path = get_relative_path(path, get_path(path).parent) + assert str(relative_path) == _name_temp_file + assert str(relative_path) == os.path.relpath(path, start=get_path(path).parent) + + +def test_two_2_three_digits_country(): + """ + Verify the conversion from two-digit to three-digit country code. + """ + # Afghanistan + assert two_2_three_digits_country("AF") == "AFG" + # American Samoa + assert two_2_three_digits_country("AS") == "ASM" + # Aruba + assert two_2_three_digits_country("AW") == "ABW" + # Germany + assert two_2_three_digits_country("DE") == "DEU" + # Micronesia (Federated States of) + assert two_2_three_digits_country("FM") == "FSM" + + +def test_three_2_two_digits_country(): + """ + Verify the conversion from three-digit to two-digit country code. + """ + # Afghanistan + assert "AF" == three_2_two_digits_country("AFG") + # American Samoa + assert "AS" == three_2_two_digits_country("ASM") + # Aruba + assert "AW" == three_2_two_digits_country("ABW") + # Germany + assert "DE" == three_2_two_digits_country("DEU") + # Micronesia (Federated States of) + assert "FM" == three_2_two_digits_country("FSM") + + +def test_two_digits_2_name_country(): + """ + Verify the conversion from two-digit country code to country name. + """ + # Micronesia (Federated States of) + assert "Micronesia, Fed. Sts." == two_digits_2_name_country("FM") + assert "Federated States of Micronesia" == two_digits_2_name_country( + "FM", name_string="name_official" + ) + assert "States of Micronesia" == two_digits_2_name_country( + "FM", name_string="name_official", remove_start_words=["Federated "] + ) + # Democratic Republic of the Congo + assert "DR Congo" == two_digits_2_name_country("CD") + assert "Democratic Republic of the Congo" == two_digits_2_name_country( + "CD", name_string="name_official" + ) + assert "Republic of the Congo" == two_digits_2_name_country( + "CD", name_string="name_official", remove_start_words=["Democratic "] + ) + + +def test_country_name_2_two_digits(): + """ + Verify the conversion from country name to two-digit country code. + """ + # Afghanistan + assert "AF" == country_name_2_two_digits("Afghanistan") + # American Samoa + assert "AS" == country_name_2_two_digits("American Samoa") + # Aruba + assert "AW" == country_name_2_two_digits("Aruba") + # Germany + assert "DE" == country_name_2_two_digits("Germany") + # Micronesia (Federated States of) + assert "FM" == country_name_2_two_digits("Micronesia") + + +def test_safe_divide(): + """ + Verify that the method safe_divide prevents divisions by vanishing + denominator. + """ + assert safe_divide(3.0, 2.0) == 1.5 + assert np.isnan(safe_divide(3.0, 0.0)) + + +def test_get_conv_factors(): + """ + Verify that the conversion factors returned by get_conv_factors are + correct. + """ + conversion_factors_dict = get_conv_factors("industry") + assert conversion_factors_dict["additives and oxygenates"] == 0.008333 + assert conversion_factors_dict["anthracite"] == 0.005 + assert conversion_factors_dict["aviation gasoline"] == 0.01230 + assert conversion_factors_dict["bagasse"] == 0.002144 + assert conversion_factors_dict["biodiesel"] == 0.01022 + assert conversion_factors_dict["biogasoline"] == 0.007444 + assert conversion_factors_dict["bio jet kerosene"] == 0.011111 + assert conversion_factors_dict["bitumen"] == 0.01117 + assert conversion_factors_dict["brown coal"] == 0.003889 + assert conversion_factors_dict["brown coal briquettes"] == 0.00575 + assert conversion_factors_dict["charcoal"] == 0.00819 + assert conversion_factors_dict["coal tar"] == 0.007778 + assert conversion_factors_dict["coke-oven coke"] == 0.0078334 + assert conversion_factors_dict["coke-oven gas"] == 0.000277 + assert conversion_factors_dict["coking coal"] == 0.007833 + assert conversion_factors_dict["conventional crude oil"] == 0.01175 + assert conversion_factors_dict["crude petroleum"] == 0.011750 + assert conversion_factors_dict["ethane"] == 0.01289 + assert conversion_factors_dict["fuel oil"] == 0.01122 + assert conversion_factors_dict["fuelwood"] == 0.00254 + assert conversion_factors_dict["gas coke"] == 0.007326 + assert conversion_factors_dict["gas oil/ diesel oil"] == 0.01194 + assert conversion_factors_dict["gasoline-type jet fuel"] == 0.01230 + assert conversion_factors_dict["hard coal"] == 0.007167 + assert conversion_factors_dict["kerosene-type jet fuel"] == 0.01225 + assert conversion_factors_dict["lignite"] == 0.003889 + assert conversion_factors_dict["liquefied petroleum gas (lpg)"] == 0.01313 + assert conversion_factors_dict["lubricants"] == 0.011166 + assert conversion_factors_dict["motor gasoline"] == 0.01230 + assert conversion_factors_dict["naphtha"] == 0.01236 + assert conversion_factors_dict["natural gas liquids"] == 0.01228 + assert conversion_factors_dict["oil shale"] == 0.00247 + assert conversion_factors_dict["other bituminous coal"] == 0.005556 + assert conversion_factors_dict["paraffin waxes"] == 0.01117 + assert conversion_factors_dict["patent fuel"] == 0.00575 + assert conversion_factors_dict["peat"] == 0.00271 + assert conversion_factors_dict["peat products"] == 0.00271 + assert conversion_factors_dict["petroleum coke"] == 0.009028 + assert conversion_factors_dict["refinery gas"] == 0.01375 + assert conversion_factors_dict["sub-bituminous coal"] == 0.005555 + assert np.isnan(get_conv_factors("non-industry")) + + +def test_modify_commodity(): + """ + Verify that modify_commodity returns the commodities in wished format. + """ + new_commodity_dataframe = pd.DataFrame() + new_commodity_dataframe["Commodity"] = ( + original_commodity_dataframe["Commodity"].map(modify_commodity).unique() + ) + df = new_commodity_dataframe.compare(modified_commodity_dataframe) + boolean_flag = df.empty + if not boolean_flag: + assert False + + +def test_aggregate_fuels(): + """ + Verify what is returned by aggregate_fuels. + """ + assert np.isnan(aggregate_fuels("non-industry")) diff --git a/test/test_prepare_network.py b/test/test_prepare_network.py new file mode 100644 index 000000000..914089614 --- /dev/null +++ b/test/test_prepare_network.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +import sys + +sys.path.append("./scripts") + +from prepare_network import download_emission_data + + +def test_download_emission_data(): + filename = download_emission_data() + assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"