diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml index a0e0299c4..08a7da822 100644 --- a/.github/workflows/ci-linux.yaml +++ b/.github/workflows/ci-linux.yaml @@ -32,10 +32,11 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v2 + - name: Checkout + uses: actions/checkout@v3 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -43,7 +44,7 @@ jobs: use-mamba: true - name: Create environment cache - uses: actions/cache@v2 + uses: actions/cache@v3 id: cache with: path: ${{ matrix.prefix }} diff --git a/.github/workflows/ci-mac.yaml b/.github/workflows/ci-mac.yaml index 4cb98fa97..bbdf4e957 100644 --- a/.github/workflows/ci-mac.yaml +++ b/.github/workflows/ci-mac.yaml @@ -30,14 +30,15 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v2 + - name: Checkout + uses: actions/checkout@v3 # - name: Add solver to environment # run: | # echo -e "- glpk\n- ipopt<3.13.3" >> envs/environment.yaml - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -45,15 +46,15 @@ jobs: use-mamba: true - name: Create environment cache - uses: actions/cache@v2 + uses: actions/cache@v3 id: cache with: path: ${{ matrix.prefix }} - key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} + key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.mac.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} - name: Update environment due to outdated or unavailable cache if: steps.cache.outputs.cache-hit != 'true' - run: mamba env update -n pypsa-earth -f envs/environment.yaml + run: mamba env update -n pypsa-earth -f envs/environment.mac.yaml - name: Conda list run: | diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml index 14d56e903..7697306e3 100644 --- a/.github/workflows/ci-windows.yaml +++ b/.github/workflows/ci-windows.yaml @@ -30,14 +30,15 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v2 + - name: Checkout + uses: actions/checkout@v3 # - name: Add solver to environment # run: | # echo -e "- glpk\n- ipopt<3.13.3" >> envs/environment.yaml - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -45,7 +46,7 @@ jobs: use-mamba: true - name: Create environment cache - uses: actions/cache@v2 + uses: actions/cache@v3 id: cache with: path: ${{ matrix.prefix }} diff --git a/config.default.yaml b/config.default.yaml index 91ae2c53f..80505f563 100644 --- a/config.default.yaml +++ b/config.default.yaml @@ -94,6 +94,8 @@ build_shape_options: worldpop_method: "standard" # "standard" pulls from web 1kmx1km raster, "api" pulls from API 100mx100m raster, false (not "false") no pop addition to shape which is useful when generating only cutout gdp_method: "standard" # "standard" pulls from web 1x1km raster, false (not "false") no gdp addition to shape which useful when generating only cutout contended_flag: "set_by_country" # "set_by_country" assigns the contended areas to the countries according to the GADM database, "drop" drops these contended areas from the model + gadm_file_prefix: "gadm41_" + gadm_url_prefix: "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/" clean_osm_data_options: # osm = OpenStreetMap names_by_shapes: true # Set the country name based on the extended country shapes diff --git a/config.tutorial.yaml b/config.tutorial.yaml index e6be7cf5b..09c71c068 100644 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -19,7 +19,7 @@ countries: ["NG", "BJ"] #["NG"] # Nigeria #["NE"] # Niger #["SL"] # Sierra Leone - #["MA"] # Morroco + #["MA"] # Morocco #["ZA"] # South Africa enable: @@ -108,6 +108,8 @@ build_shape_options: worldpop_method: "standard" # "standard" pulls from web 1kmx1km raster, "api" pulls from API 100mx100m raster, false (not "false") no pop addition to shape which is useful when generating only cutout gdp_method: "standard" # "standard" pulls from web 1x1km raster, false (not "false") no gdp addition to shape which useful when generating only cutout contended_flag: "set_by_country" # "set_by_country" assigns the contended areas to the countries according to the GADM database, "drop" drops these contended areas from the model + gadm_file_prefix: "gadm41_" + gadm_url_prefix: "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/" clean_osm_data_options: names_by_shapes: true # Set the country name based on the extended country shapes diff --git a/envs/environment.mac.yaml b/envs/environment.mac.yaml new file mode 100644 index 000000000..608fcb236 --- /dev/null +++ b/envs/environment.mac.yaml @@ -0,0 +1,87 @@ +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +name: pypsa-earth +channels: +- conda-forge +- bioconda +- gurobi +dependencies: +- python>=3.8 +- pip +- mamba # esp for windows build + +- pypsa>=0.24, <0.25 +# - atlite>=0.2.4 # until https://github.com/PyPSA/atlite/issues/244 is not merged +- dask +- powerplantmatching>=0.5.7 +- earth-osm>=2.1 +- atlite + + # Dependencies of the workflow itself +- xlrd +- openpyxl +- seaborn +- snakemake-minimal<8 +- memory_profiler +- ruamel.yaml<=0.17.26 +- pytables +- lxml +- numpy +- pandas +- geopandas>=0.11.0, <=0.14.3 +- fiona!=1.8.22 +- xarray>=2023.11.0, <2023.12.0 +- netcdf4 +- networkx +- scipy +- pydoe2 +- shapely!=2.0.4 +- pre-commit +- pyomo +- matplotlib<=3.5.2 +- reverse-geocode +- country_converter +- pyogrio +- numba +- py7zr + + # Keep in conda environment when calling ipython +- ipython + # Jupyter notebook requirement +- ipykernel +- jupyterlab + + # GIS dependencies: +- cartopy +- descartes +- rasterio!=1.2.10 +- rioxarray + + # Plotting +- geoviews +- hvplot +- graphviz +- contextily +- graphviz + + # PyPSA-Eur-Sec Dependencies +- geopy +- tqdm +- pytz +- country_converter + + # Cloud download +# - googledrivedownloader # Commented until https://github.com/ndrplz/google-drive-downloader/pull/28 is merged: PR installed using pip + +# Default solver for tests (required for CI) +- glpk +- ipopt +- gurobi + +- pip: + - git+https://github.com/davide-f/google-drive-downloader@master # google drive with fix for virus scan + - git+https://github.com/FRESNA/vresutils@master # until new pip release > 0.3.1 (strictly) + - tsam>=1.1.0 + - chaospy # lastest version only available on pip diff --git a/scripts/_helpers.py b/scripts/_helpers.py index d07951f13..0140ccb28 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -12,7 +12,6 @@ import subprocess import sys import urllib -import zipfile import country_converter as coco import fiona @@ -911,62 +910,181 @@ def cycling_shift(df, steps=1): return df -def download_gadm(country_code, update=False, out_logging=False): +def get_gadm_filename(country_code, file_prefix="gadm41_"): + """ + Function to get three digits country code for GADM. + """ + special_codes_gadm = { + "XK": "XKO", # kosovo + "CP": "XCL", # clipperton island + "SX": "MAF", # saint-martin + "TF": "ATF", # french southern territories + "AX": "ALA", # aland + "IO": "IOT", # british indian ocean territory + "CC": "CCK", # cocos island + "NF": "NFK", # norfolk + "PN": "PCN", # pitcairn islands + "JE": "JEY", # jersey + "XS": "XSP", # spratly islands + "GG": "GGY", # guernsey + "UM": "UMI", # United States minor outlying islands + "SJ": "SJM", # svalbard + "CX": "CXR", # Christmas island + } + + if country_code in special_codes_gadm: + return file_prefix + special_codes_gadm[country_code] + else: + return file_prefix + two_2_three_digits_country(country_code) + + +def get_gadm_url(gadm_url_prefix, gadm_filename): + """ + Function to get the gadm url given a gadm filename. + """ + return gadm_url_prefix + gadm_filename + ".gpkg" + + +def download_gadm( + country_code, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, + update=False, + out_logging=False, +): """ Download gpkg file from GADM for a given country code. Parameters ---------- country_code : str - Two letter country codes of the downloaded files + 2-digit country name of the downloaded files + file_prefix : str + file prefix string + gadm_url_prefix: str + gadm url prefix + gadm_input_file_args: list[str] + gadm input file arguments list update : bool Update = true, forces re-download of files + out_logging : bool + out_logging = true, enables output logging Returns ------- gpkg file per country """ - gadm_filename = f"gadm36_{two_2_three_digits_country(country_code)}" - gadm_url = f"https://biogeo.ucdavis.edu/data/gadm3.6/gpkg/{gadm_filename}_gpkg.zip" _logger = logging.getLogger(__name__) - gadm_input_file_zip = get_path( + + gadm_filename = get_gadm_filename(country_code, file_prefix) + gadm_url = get_gadm_url(gadm_url_prefix, gadm_filename) + gadm_input_file = get_path( get_current_directory_path(), - "data", - "raw", - "gadm", + *gadm_input_file_args, + gadm_filename, gadm_filename, - gadm_filename + ".zip", - ) # Input filepath zip + ) gadm_input_file_gpkg = get_path( - get_current_directory_path(), - "data", - "raw", - "gadm", - gadm_filename, - gadm_filename + ".gpkg", + str(gadm_input_file) + ".gpkg" ) # Input filepath gpkg if not pathlib.Path(gadm_input_file_gpkg).exists() or update is True: if out_logging: _logger.warning( - f"Stage 4/4: {gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_zip}" + f"{gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_gpkg}" ) + # create data/osm directory - build_directory(gadm_input_file_zip) + build_directory(str(gadm_input_file_gpkg)) - with requests.get(gadm_url, stream=True) as r: - with open(gadm_input_file_zip, "wb") as f: + try: + r = requests.get(gadm_url, stream=True, timeout=300) + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + raise Exception( + f"GADM server is down at {gadm_url}. Data needed for building shapes can't be extracted.\n\r" + ) + except Exception as exception: + raise Exception( + f"An error happened when trying to load GADM data by {gadm_url}.\n\r" + + str(exception) + + "\n\r" + ) + else: + with open(gadm_input_file_gpkg, "wb") as f: shutil.copyfileobj(r.raw, f) - with zipfile.ZipFile(gadm_input_file_zip, "r") as zip_ref: - zip_ref.extractall(pathlib.Path(gadm_input_file_zip).parent) - return gadm_input_file_gpkg, gadm_filename -def get_gadm_layer(country_list, layer_id, update=False, outlogging=False): +def get_gadm_layer_name(country_code, file_prefix, layer_id, code_layer): + + if file_prefix == "gadm41_": + return "ADM_ADM_" + str(layer_id) + else: + raise Exception( + f"The requested GADM data version {file_prefix} does not exist." + ) + + +def filter_gadm( + geo_df, + layer, + cc, + contended_flag, + output_nonstd_to_csv=False, +): + # identify non-standard geo_df rows + geo_df_non_std = geo_df[geo_df["GID_0"] != two_2_three_digits_country(cc)].copy() + + if not geo_df_non_std.empty: + logger.info( + f"Contended areas have been found for gadm layer {layer}. They will be treated according to {contended_flag} option" + ) + + # NOTE: in these options GID_0 is not changed because it is modified below + if contended_flag == "drop": + geo_df.drop(geo_df_non_std.index, inplace=True) + elif contended_flag != "set_by_country": + # "set_by_country" option is the default; if this elif applies, the desired option falls back to the default + logger.warning( + f"Value '{contended_flag}' for option contented_flag is not recognized.\n" + + "Fallback to 'set_by_country'" + ) + + # force GID_0 to be the country code for the relevant countries + geo_df["GID_0"] = cc + + # country shape should have a single geometry + if (layer == 0) and (geo_df.shape[0] > 1): + logger.warning( + f"Country shape is composed by multiple shapes that are being merged in agreement to contented_flag option '{contended_flag}'" + ) + # take the first row only to re-define geometry keeping other columns + geo_df = geo_df.iloc[[0]].set_geometry([geo_df.unary_union]) + + # debug output to file + if output_nonstd_to_csv and not geo_df_non_std.empty: + geo_df_non_std.to_csv( + f"resources/non_standard_gadm{layer}_{cc}_raw.csv", index=False + ) + + return geo_df + + +def get_gadm_layer( + country_list, + layer_id, + geo_crs, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, + contended_flag, + update=False, + out_logging=False, +): """ Function to retrieve a specific layer id of a geopackage for a selection of countries. @@ -979,52 +1097,95 @@ def get_gadm_layer(country_list, layer_id, update=False, outlogging=False): Layer to consider in the format GID_{layer_id}. When the requested layer_id is greater than the last available layer, then the last layer is selected. When a negative value is requested, then, the last layer is requested + geo_crs: str + General geographic projection + file_prefix : str + file prefix string + gadm_url_prefix : str + gadm url prefix + gadm_input_file_args: list[str] + gadm input file arguments list + contended_flag : str + contended areas + update : bool + Update = true, forces re-download of files + out_logging : bool + out_logging = true, enables output logging """ - # initialization of the list of geodataframes - geodf_list = [] + # initialization of the list of geo dataframes + geo_df_list = [] for country_code in country_list: # download file gpkg - file_gpkg, name_file = download_gadm(country_code, update, outlogging) + file_gpkg, name_file = download_gadm( + country_code, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, + update, + out_logging, + ) # get layers of a geopackage list_layers = fiona.listlayers(file_gpkg) # get layer name - if layer_id < 0 | layer_id >= len(list_layers): + if layer_id < 0 or layer_id >= len(list_layers): # when layer id is negative or larger than the number of layers, select the last layer layer_id = len(list_layers) - 1 code_layer = np.mod(layer_id, len(list_layers)) - layer_name = ( - f"gadm36_{two_2_three_digits_country(country_code).upper()}_{code_layer}" + layer_name = get_gadm_layer_name( + country_code, file_prefix, layer_id, code_layer ) # read gpkg file - geodf_temp = gpd.read_file(file_gpkg, layer=layer_name) - - # convert country name representation of the main country (GID_0 column) - geodf_temp["GID_0"] = [ - three_2_two_digits_country(twoD_c) for twoD_c in geodf_temp["GID_0"] - ] + geo_df_temp = gpd.read_file( + file_gpkg, layer=layer_name, engine="pyogrio" + ).to_crs(geo_crs) + + country_sub_index = "" + if file_prefix == "gadm41_": + country_sub_index = f"GID_{layer_id}" + geo_df_temp = filter_gadm( + geo_df=geo_df_temp, + layer=layer_id, + cc=country_code, + contended_flag=contended_flag, + output_nonstd_to_csv=False, + ) + elif file_prefix == "gadm36_": + country_sub_index = f"GID_{code_layer}" + geo_df_temp["GID_0"] = [ + three_2_two_digits_country(twoD_c) for twoD_c in geo_df_temp["GID_0"] + ] + else: + raise Exception( + f"The requested GADM data version {file_prefix} does not exist." + ) - # create a subindex column that is useful - # in the GADM processing of sub-national zones - geodf_temp["GADM_ID"] = geodf_temp[f"GID_{code_layer}"] + geo_df_temp["GADM_ID"] = geo_df_temp[country_sub_index] - # concatenate geodataframes - geodf_list = pd.concat([geodf_list, geodf_temp]) + # append geo data frames + geo_df_list.append(geo_df_temp) - geodf_gadm = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True)) - geodf_gadm.set_crs(geodf_list[0].crs, inplace=True) + geo_df_gadm = gpd.GeoDataFrame(pd.concat(geo_df_list, ignore_index=True)) + geo_df_gadm.set_crs(geo_crs, inplace=True) - return geodf_gadm + return geo_df_gadm def locate_bus( coords, co, gadm_level, + geo_crs, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, + contended_flag, path_to_gadm=None, + update=False, + out_logging=False, gadm_clustering=False, ): """ @@ -1037,6 +1198,28 @@ def locate_bus( dataseries with 2 rows x & y representing the longitude and latitude co: string (code for country where coords are MA Morocco) code of the countries where the coordinates are + gadm_level : int + Layer to consider in the format GID_{layer_id}. + When the requested layer_id is greater than the last available layer, then the last layer is selected. + When a negative value is requested, then, the last layer is requested + geo_crs : str + General geographic projection + file_prefix : str + file prefix string + gadm_url_prefix: str + gadm url prefix + gadm_input_file_args: list[str] + gadm input file arguments list + contended_flag : str + contended areas + path_to_gadm : str + path to gadm + update : bool + Update = true, forces re-download of files + out_logging : bool + out_logging = true, enables output logging + gadm_clustering : bool + gadm_cluster = true, to enable clustering """ col = "name" if not gadm_clustering: @@ -1054,7 +1237,17 @@ def locate_bus( lambda name: three_2_two_digits_country(name[:3]) + name[3:] ) else: - gdf = get_gadm_layer(co, gadm_level) + gdf = get_gadm_layer( + co, + gadm_level, + geo_crs, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, + contended_flag, + update, + out_logging, + ) col = "GID_{}".format(gadm_level) # gdf.set_index("GADM_ID", inplace=True) diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py index faa1965c0..76fd324b9 100644 --- a/scripts/build_demand_profiles.py +++ b/scripts/build_demand_profiles.py @@ -41,8 +41,7 @@ it returns a csv file called "demand_profiles.csv", that allocates the load to the buses of the network according to GDP and population. """ -import os -import os.path +import pathlib from itertools import product import geopandas as gpd @@ -117,10 +116,8 @@ def get_load_paths_gegis(ssp_parentfolder, config): prediction_year = config.get("load_options")["prediction_year"] ssp = config.get("load_options")["ssp"] - scenario_path = os.path.join(ssp_parentfolder, ssp) - load_paths = [] - load_dir = os.path.join( + load_dir = get_path( ssp_parentfolder, str(ssp), str(prediction_year), @@ -131,12 +128,12 @@ def get_load_paths_gegis(ssp_parentfolder, config): for continent in region_load: sel_ext = ".nc" for ext in [".nc", ".csv"]: - load_path = os.path.join(str(load_dir), str(continent) + str(ext)) - if os.path.exists(load_path): + load_path = get_path(load_dir, str(continent) + str(ext)) + if pathlib.Path(load_path).exists(): sel_ext = ext break file_name = str(continent) + str(sel_ext) - load_path = os.path.join(str(load_dir), file_name) + load_path = get_path(load_dir, file_name) load_paths.append(load_path) file_names.append(file_name) diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index f6b4d5874..1057633bb 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -11,7 +11,6 @@ from itertools import takewhile from operator import attrgetter -import fiona import geopandas as gpd import numpy as np import pandas as pd @@ -24,12 +23,12 @@ configure_logging, create_logger, get_current_directory_path, + get_gadm_layer, get_path, mock_snakemake, sets_path_to_root, three_2_two_digits_country, two_2_three_digits_country, - two_digits_2_name_country, ) from numba import njit from numba.core import types @@ -47,197 +46,6 @@ logger = create_logger(__name__) -def get_GADM_filename(country_code): - """ - Function to get the GADM filename given the country code. - """ - special_codes_GADM = { - "XK": "XKO", # kosovo - "CP": "XCL", # clipperton island - "SX": "MAF", # sint maartin - "TF": "ATF", # french southern territories - "AX": "ALA", # aland - "IO": "IOT", # british indian ocean territory - "CC": "CCK", # cocos island - "NF": "NFK", # norfolk - "PN": "PCN", # pitcairn islands - "JE": "JEY", # jersey - "XS": "XSP", # spratly - "GG": "GGY", # guernsey - "UM": "UMI", # united states minor outlying islands - "SJ": "SJM", # svalbard - "CX": "CXR", # Christmas island - } - - if country_code in special_codes_GADM: - return f"gadm41_{special_codes_GADM[country_code]}" - else: - return f"gadm41_{two_2_three_digits_country(country_code)}" - - -def download_GADM(country_code, update=False, out_logging=False): - """ - Download gpkg file from GADM for a given country code. - - Parameters - ---------- - country_code : str - Two letter country codes of the downloaded files - update : bool - Update = true, forces re-download of files - - Returns - ------- - gpkg file per country - """ - GADM_filename = get_GADM_filename(country_code) - GADM_url = f"https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/{GADM_filename}.gpkg" - - GADM_inputfile_gpkg = get_path( - get_current_directory_path(), - "data", - "gadm", - GADM_filename, - GADM_filename + ".gpkg", - ) # Input filepath gpkg - - if not pathlib.Path(GADM_inputfile_gpkg).exists() or update is True: - if out_logging: - logger.warning( - f"Stage 5 of 5: {GADM_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {GADM_inputfile_gpkg}" - ) - # create data/osm directory - build_directory(GADM_inputfile_gpkg) - - try: - r = requests.get(GADM_url, stream=True, timeout=300) - except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): - raise Exception( - f"GADM server is down at {GADM_url}. Data needed for building shapes can't be extracted.\n\r" - ) - except Exception as exception: - raise Exception( - f"An error happened when trying to load GADM data by {GADM_url}.\n\r" - + str(exception) - + "\n\r" - ) - else: - with open(GADM_inputfile_gpkg, "wb") as f: - shutil.copyfileobj(r.raw, f) - - return GADM_inputfile_gpkg, GADM_filename - - -def filter_gadm( - geodf, - layer, - cc, - contended_flag, - output_nonstd_to_csv=False, -): - # identify non standard geodf rows - geodf_non_std = geodf[geodf["GID_0"] != two_2_three_digits_country(cc)].copy() - - if not geodf_non_std.empty: - logger.info( - f"Contended areas have been found for gadm layer {layer}. They will be treated according to {contended_flag} option" - ) - - # NOTE: in these options GID_0 is not changed because it is modified below - if contended_flag == "drop": - geodf.drop(geodf_non_std.index, inplace=True) - elif contended_flag != "set_by_country": - # "set_by_country" option is the default; if this elif applies, the desired option falls back to the default - logger.warning( - f"Value '{contended_flag}' for option contented_flag is not recognized.\n" - + "Fallback to 'set_by_country'" - ) - - # force GID_0 to be the country code for the relevant countries - geodf["GID_0"] = cc - - # country shape should have a single geometry - if (layer == 0) and (geodf.shape[0] > 1): - logger.warning( - f"Country shape is composed by multiple shapes that are being merged in agreement to contented_flag option '{contended_flag}'" - ) - # take the first row only to re-define geometry keeping other columns - geodf = geodf.iloc[[0]].set_geometry([geodf.unary_union]) - - # debug output to file - if output_nonstd_to_csv and not geodf_non_std.empty: - geodf_non_std.to_csv( - f"resources/non_standard_gadm{layer}_{cc}_raw.csv", index=False - ) - - return geodf - - -def get_GADM_layer( - country_list, - layer_id, - geo_crs, - contended_flag, - update=False, - outlogging=False, -): - """ - Function to retrieve a specific layer id of a geopackage for a selection of - countries. - - Parameters - ---------- - country_list : str - List of the countries - layer_id : int - Layer to consider in the format GID_{layer_id}. - When the requested layer_id is greater than the last available layer, then the last layer is selected. - When a negative value is requested, then, the last layer is requested - """ - # initialization of the geoDataFrame - geodf_list = [] - - for country_code in country_list: - # Set the current layer id (cur_layer_id) to global layer_id - cur_layer_id = layer_id - - # download file gpkg - file_gpkg, name_file = download_GADM(country_code, update, outlogging) - - # get layers of a geopackage - list_layers = fiona.listlayers(file_gpkg) - - # get layer name - if (cur_layer_id < 0) or (cur_layer_id >= len(list_layers)): - # when layer id is negative or larger than the number of layers, select the last layer - cur_layer_id = len(list_layers) - 1 - - # read gpkg file - geodf_temp = gpd.read_file( - file_gpkg, layer="ADM_ADM_" + str(cur_layer_id), engine="pyogrio" - ).to_crs(geo_crs) - - geodf_temp = filter_gadm( - geodf=geodf_temp, - layer=cur_layer_id, - cc=country_code, - contended_flag=contended_flag, - output_nonstd_to_csv=False, - ) - - # create a subindex column that is useful - # in the GADM processing of sub-national zones - geodf_temp["GADM_ID"] = geodf_temp[f"GID_{cur_layer_id}"] - - # append geodataframes - geodf_list.append(geodf_temp) - - geodf_GADM = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True)) - geodf_GADM.set_crs(geo_crs) - - return geodf_GADM - - def _simplify_polys(polys, minarea=0.01, tolerance=0.01, filterremote=False): "Function to simplify the shape polygons" if isinstance(polys, MultiPolygon): @@ -257,17 +65,30 @@ def _simplify_polys(polys, minarea=0.01, tolerance=0.01, filterremote=False): return polys.simplify(tolerance=tolerance) -def countries(countries, geo_crs, contended_flag, update=False, out_logging=False): +def countries( + countries, + layer_id, + geo_crs, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, + contended_flag, + update, + out_logging, +): "Create country shapes" if out_logging: logger.info("Stage 1 of 5: Create country shapes") # download data if needed and get the layer id 0, corresponding to the countries - df_countries = get_GADM_layer( + df_countries = get_gadm_layer( countries, 0, geo_crs, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, contended_flag, update, out_logging, @@ -1247,6 +1068,9 @@ def gadm( gdp_method, countries, geo_crs, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, contended_flag, mem_mb, layer_id=2, @@ -1259,7 +1083,17 @@ def gadm( logger.info("Stage 3 of 5: Creation GADM GeoDataFrame") # download data if needed and get the desired layer_id - df_gadm = get_GADM_layer(countries, layer_id, geo_crs, contended_flag, update) + df_gadm = get_gadm_layer( + countries, + layer_id, + geo_crs, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, + contended_flag, + update, + out_logging, + ) # select and rename columns df_gadm.rename(columns={"GID_0": "country"}, inplace=True) @@ -1296,7 +1130,7 @@ def gadm( name_file_nc="GDP_PPP_1990_2015_5arcmin_v2.nc", ) - # renaming 3 letter to 2 letter ISO code before saving GADM file + # renaming three-letter to two-letter ISO code before saving GADM file # In the case of a contested territory in the form 'Z00.00_0', save 'AA.00_0' # Include bugfix for the case of 'XXX00_0' where the "." is missing, such as for Ghana df_gadm["GADM_ID"] = df_gadm["country"] + df_gadm["GADM_ID"].str[3:].apply( @@ -1317,7 +1151,6 @@ def gadm( change_to_script_dir(__file__) snakemake = mock_snakemake("build_shapes") sets_path_to_root("pypsa-earth") - configure_logging(snakemake) out = snakemake.output @@ -1337,10 +1170,17 @@ def gadm( contended_flag = snakemake.params.build_shape_options["contended_flag"] worldpop_method = snakemake.params.build_shape_options["worldpop_method"] gdp_method = snakemake.params.build_shape_options["gdp_method"] + file_prefix = snakemake.params.build_shape_options["gadm_file_prefix"] + gadm_url_prefix = snakemake.params.build_shape_options["gadm_url_prefix"] + gadm_input_file_args = ["data", "gadm"] country_shapes = countries( countries_list, + layer_id, geo_crs, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, contended_flag, update, out_logging, @@ -1363,6 +1203,9 @@ def gadm( gdp_method, countries_list, geo_crs, + file_prefix, + gadm_url_prefix, + gadm_input_file_args, contended_flag, mem_mb, layer_id, diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py index 59c34ea3a..7488b748d 100755 --- a/scripts/prepare_network.py +++ b/scripts/prepare_network.py @@ -103,9 +103,11 @@ def download_emission_data(): ) pathlib.Path(file_path).unlink(missing_ok=True) return "v60_CO2_excl_short-cycle_org_C_1970_2018.xls" - except: - logger.error(f"Failed download resource from '{url}'.") - return False + except requests.exceptions.RequestException as e: + logger.error( + f"Failed download resource from '{url}' with exception message '{e}'." + ) + raise SystemExit(e) def emission_extractor(filename, emission_year, country_names): @@ -120,7 +122,7 @@ def emission_extractor(filename, emission_year, country_names): emission_year : int Year of CO2 emissions country_names : numpy.ndarray - Two letter country codes of analysed countries. + Two-letter country codes of analysed countries. Returns ------- @@ -128,8 +130,8 @@ def emission_extractor(filename, emission_year, country_names): """ # data reading process - datapath = get_path(get_current_directory_path(), "data", filename) - df = pd.read_excel(datapath, sheet_name="v6.0_EM_CO2_fossil_IPCC1996", skiprows=8) + data_path = get_path(get_current_directory_path(), "data", filename) + df = pd.read_excel(data_path, sheet_name="v6.0_EM_CO2_fossil_IPCC1996", skiprows=8) df.columns = df.iloc[0] df = df.set_index("Country_code_A3") df = df.loc[ @@ -192,7 +194,7 @@ def set_line_s_max_pu(n, s_max_pu): logger.info(f"N-1 security margin of lines set to {s_max_pu}") -def set_transmission_limit(n, ll_type, factor, costs, Nyears=1): +def set_transmission_limit(n, ll_type, factor, costs): links_dc_b = n.links.carrier == "DC" if not n.links.empty else pd.Series() _lines_s_nom = ( @@ -430,7 +432,7 @@ def set_line_nom_max(n, s_nom_max_set=np.inf, p_nom_max_set=np.inf): break ll_type, factor = snakemake.wildcards.ll[0], snakemake.wildcards.ll[1:] - set_transmission_limit(n, ll_type, factor, costs, Nyears) + set_transmission_limit(n, ll_type, factor, costs) set_line_nom_max( n, diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py index d425a781d..84b1c89b4 100644 --- a/scripts/retrieve_databundle_light.py +++ b/scripts/retrieve_databundle_light.py @@ -162,8 +162,10 @@ def download_and_unzip_zenodo(config, root_path, hot_run=True, disable_progress= zipObj.extractall(path=destination) pathlib.Path(file_path).unlink(missing_ok=True) logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.") - except: - logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.") + except Exception as e: + logger.warning( + f"Failed download resource '{resource}' from cloud '{url}' with exception message '{e}'." + ) return False return True @@ -336,9 +338,9 @@ def get_first_day_of_previous_month(date): pathlib.Path(inner_zipname).unlink(missing_ok=True) logger.info(f"{resource} - Successfully unzipped file '{fzip}'") - except: + except Exception as e: logger.warning( - f"Exception while unzipping file '{fzip}' for {resource_iter}: skipped file" + f"Exception while unzipping file '{fzip}' for {resource_iter} with exception message '{e}': skipped file" ) # close and remove outer zip file @@ -351,9 +353,9 @@ def get_first_day_of_previous_month(date): downloaded = True break - except: + except Exception as e: logger.warning( - f"Failed download resource '{resource_iter}' from cloud '{url_iter}'." + f"Failed download resource '{resource_iter}' from cloud '{url_iter}' with exception message '{e}'." ) current_first_day = get_first_day_of_previous_month(current_first_day) @@ -411,8 +413,10 @@ def download_and_unpack( pathlib.Path(file_path).unlink(missing_ok=True) logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.") return True - except: - logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.") + except Exception as e: + logger.warning( + f"Failed download resource '{resource}' from cloud '{url}' with exception message '{e}'." + ) return False @@ -868,8 +872,10 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level): config_bundles[b_name], root_path, disable_progress=disable_progress ): downloaded_bundle = True - except Exception: - logger.warning(f"Error in downloading bundle {b_name} - host {host}") + except Exception as e: + logger.warning( + f"Error in downloading bundle {b_name} - host {host} - with exception message '{e}'" + ) if downloaded_bundle: downloaded_bundles.append(b_name) diff --git a/test/test_helpers.py b/test/test_helpers.py index 4d65adea2..7cee35c8f 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -17,6 +17,7 @@ get_temp_file, ) +import fiona import numpy as np import pandas as pd @@ -27,8 +28,11 @@ build_directory, change_to_script_dir, country_name_2_two_digits, + download_gadm, get_conv_factors, get_current_directory_path, + get_gadm_filename, + get_gadm_url, get_path, get_path_size, get_relative_path, @@ -274,6 +278,17 @@ def test_get_path(): "sub_path_5", "file.nc", ) + file_name_path_one_list_unpacked = get_path( + path_cwd, + *[ + "sub_path_1", + "sub_path_2", + "sub_path_3", + "sub_path_4", + "sub_path_5", + "file.nc", + ], + ) path_name_path_two = get_path( pathlib.Path(__file__).parent, "..", "logs", "rule.log" ) @@ -286,6 +301,15 @@ def test_get_path(): "sub_path_5", "file.nc", ) + assert str(file_name_path_one_list_unpacked) == os.path.join( + path_cwd, + "sub_path_1", + "sub_path_2", + "sub_path_3", + "sub_path_4", + "sub_path_5", + "file.nc", + ) assert str(path_name_path_two) == str( pathlib.Path(__file__).parent.joinpath("..", "logs", "rule.log") ) @@ -467,3 +491,90 @@ def test_aggregate_fuels(): Verify what is returned by aggregate_fuels. """ assert np.isnan(aggregate_fuels("non-industry")) + + +def test_get_gadm_filename(): + """ + Verify what is returned by get_gadm_filename. + """ + # Kosovo + assert get_gadm_filename("XK") == "gadm41_XKO" + # Clipperton island + assert get_gadm_filename("CP") == "gadm41_XCL" + # Saint-Martin + assert get_gadm_filename("SX") == "gadm41_MAF" + # French Southern Territories + assert get_gadm_filename("TF") == "gadm41_ATF" + # Aland + assert get_gadm_filename("AX") == "gadm41_ALA" + # British Indian Ocean Territory + assert get_gadm_filename("IO") == "gadm41_IOT" + # Cocos Islands + assert get_gadm_filename("CC") == "gadm41_CCK" + # Norfolk + assert get_gadm_filename("NF") == "gadm41_NFK" + # Pitcairn Islands + assert get_gadm_filename("PN") == "gadm41_PCN" + # Jersey + assert get_gadm_filename("JE") == "gadm41_JEY" + # Spratly Islands + assert get_gadm_filename("XS") == "gadm41_XSP" + # Guernsey + assert get_gadm_filename("GG") == "gadm41_GGY" + # United States Minor Outlying Islands + assert get_gadm_filename("UM") == "gadm41_UMI" + # Svalbard islands + assert get_gadm_filename("SJ") == "gadm41_SJM" + # Christmas island + assert get_gadm_filename("CX") == "gadm41_CXR" + # Afghanistan + assert get_gadm_filename("AF") == "gadm41_AFG" + # American Samoa + assert get_gadm_filename("AS") == "gadm41_ASM" + # Aruba + assert get_gadm_filename("AW") == "gadm41_ABW" + # Germany + assert get_gadm_filename("DE") == "gadm41_DEU" + # Micronesia (Federated States of) + assert get_gadm_filename("FM") == "gadm41_FSM" + # Micronesia (Federated States of) with different file_prefix + assert get_gadm_filename("FM", file_prefix="gadm456_") == "gadm456_FSM" + + +def test_get_gadm_url(): + """ + Verify what is returned by get_gadm_url. + """ + gadm_filename = get_gadm_filename("FM") + url_gadm41 = get_gadm_url( + "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/", + gadm_filename, + ) + assert ( + url_gadm41 + == f"https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/{gadm_filename}.gpkg" + ) + + +def test_download_gadm(): + """ + Verify what is returned by download_gadm. + """ + file_prefix_41 = "gadm41_" + gadm_url_prefix_41 = "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/" + gadm_input_file_args_41 = ["data", "gadm"] + gadm_input_file_gpkg_41, gadm_filename_41 = download_gadm( + "XK", + file_prefix_41, + gadm_url_prefix_41, + gadm_input_file_args_41, + update=True, + ) + assert gadm_input_file_gpkg_41 == get_path( + path_cwd, "data/gadm/gadm41_XKO/gadm41_XKO.gpkg" + ) + assert gadm_filename_41 == "gadm41_XKO" + list_layers_41 = fiona.listlayers(gadm_input_file_gpkg_41) + assert list_layers_41[0] == "ADM_ADM_0" + assert list_layers_41[1] == "ADM_ADM_1" + assert list_layers_41[2] == "ADM_ADM_2"