Skip to content

Commit

Permalink
code: new changes based on upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
finozzifa committed Nov 14, 2024
1 parent 92ab8f6 commit 196d255
Show file tree
Hide file tree
Showing 12 changed files with 63 additions and 82 deletions.
18 changes: 11 additions & 7 deletions scripts/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,7 @@ def locate_bus(
gadm_url_prefix,
gadm_input_file_args,
contended_flag,
col_name="name",
path_to_gadm=None,
update=False,
out_logging=False,
Expand Down Expand Up @@ -1445,8 +1446,9 @@ def locate_bus(
out_logging = true, enables output logging
gadm_clustering : bool
gadm_cluster = true, to enable clustering
col_name: str
column to use to filter the GeoDataFrame
"""
col = "name"
if not gadm_clustering:
gdf = gpd.read_file(path_to_gadm)
else:
Expand All @@ -1473,24 +1475,24 @@ def locate_bus(
update,
out_logging,
)
col = "GID_{}".format(gadm_level)
col_name = "GID_{}".format(gadm_level)

# gdf.set_index("GADM_ID", inplace=True)
gdf_co = gdf[
gdf[col].str.contains(co)
gdf[col_name].str.contains(co)
] # geodataframe of entire continent - output of prev function {} are placeholders
# in strings - conditional formatting
# insert any variable into that place using .format - extract string and filter for those containing co (MA)
point = Point(coords["x"], coords["y"]) # point object

try:
return gdf_co[gdf_co.contains(point)][
col
col_name
].item() # filter gdf_co which contains point and returns the bus

except ValueError:
return gdf_co[gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))][
col
col_name
].item() # looks for closest one shape=node


Expand Down Expand Up @@ -1566,7 +1568,9 @@ def aggregate_fuels(sector):
]

oil_fuels = [
"additives and oxygenates" "aviation gasoline" "bitumen",
"additives and oxygenates",
"aviation gasoline",
"bitumen",
"conventional crude oil",
"crude petroleum",
"ethane",
Expand All @@ -1580,7 +1584,7 @@ def aggregate_fuels(sector):
"naphtha",
"natural gas liquids",
"other kerosene",
"paraffin waxes" "patent fuel",
"paraffin waxes",
"petroleum coke",
"refinery gas",
]
Expand Down
6 changes: 2 additions & 4 deletions scripts/build_base_industry_totals.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
@author: user
"""

import pathlib

import country_converter as coco
import pandas as pd
from _helpers import (
Expand Down Expand Up @@ -117,11 +115,11 @@ def create_industry_base_totals(df):
clean_industry_list = list(transaction.clean_name.unique())

unsd_path = get_path(
pathlib.Path(snakemake.input["energy_totals_base"]).parent, "demand/unsd/data/"
get_path(snakemake.input["energy_totals_base"]).parent, "demand/unsd/data/"
)

# Get the files from the path provided in the OP
all_files = list(pathlib.Path(unsd_path).glob("*.txt"))
all_files = list(get_path(unsd_path).glob("*.txt"))

# Create a dataframe from all downloaded files
df = pd.concat(
Expand Down
6 changes: 3 additions & 3 deletions scripts/build_demand_profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ def get_load_paths_gegis(ssp_parentfolder, config):
for continent in region_load:
sel_ext = ".nc"
for ext in [".nc", ".csv"]:
load_path = get_path(BASE_DIR, load_dir, str(continent) + str(ext))
if pathlib.Path(load_path).exists():
load_path = get_path(BASE_DIR, str(load_dir), str(continent) + str(ext))
if get_path(load_path).exists():
sel_ext = ext
break
file_name = str(continent) + str(sel_ext)
load_path = get_path(load_dir, file_name)
load_path = get_path(str(load_dir), file_name)
load_paths.append(load_path)
file_names.append(file_name)

Expand Down
5 changes: 1 addition & 4 deletions scripts/build_natura_raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
The output is a raster file with the name `natura.tiff` in the folder `resources/natura/`.
"""
import os
import pathlib

import atlite
import geopandas as gpd
Expand All @@ -68,9 +67,7 @@ def get_fileshapes(list_paths, accepted_formats=(".shp",)):

list_fileshapes = []
for lf in list_paths:
if pathlib.Path(
lf
).is_dir(): # if it is a folder, then list all shapes files contained
if get_path(lf).is_dir(): # if it is a folder, then list all shapes files contained
# loop over all dirs and subdirs
for path, subdirs, files in os.walk(lf):
# loop over all files
Expand Down
16 changes: 7 additions & 9 deletions scripts/build_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# -*- coding: utf-8 -*-

import multiprocessing as mp
import pathlib
import shutil
from itertools import takewhile
from operator import attrgetter
Expand All @@ -22,7 +21,6 @@
BASE_DIR,
configure_logging,
create_logger,
get_current_directory_path,
get_gadm_layer,
get_path,
mock_snakemake,
Expand Down Expand Up @@ -117,7 +115,7 @@ def country_cover(country_shapes, eez_shapes=None, out_logging=False, distance=0


def save_to_geojson(df, fn):
pathlib.Path(fn).unlink(missing_ok=True) # remove file if it exists
get_path(fn).unlink(missing_ok=True) # remove file if it exists
if not isinstance(df, gpd.GeoDataFrame):
df = gpd.GeoDataFrame(dict(geometry=df))

Expand All @@ -139,9 +137,9 @@ def load_eez(countries_codes, geo_crs, eez_gpkg_file="./data/eez/eez_v11.gpkg"):
The dataset shall be downloaded independently by the user (see
guide) or together with pypsa-earth package.
"""
if not pathlib.Path(eez_gpkg_file).exists():
if not get_path(eez_gpkg_file).exists():
raise Exception(
f"File EEZ {eez_gpkg_file} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {pathlib.Path(eez_gpkg).parent}"
f"File EEZ {eez_gpkg_file} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {get_path(eez_gpkg).parent}"
)

geodf_EEZ = gpd.read_file(eez_gpkg_file, engine="pyogrio").to_crs(geo_crs)
Expand Down Expand Up @@ -303,7 +301,7 @@ def download_WorldPop_standard(
BASE_DIR, "data", "WorldPop", WorldPop_filename
) # Input filepath tif

if not pathlib.Path(WorldPop_inputfile).exists() or update is True:
if not get_path(WorldPop_inputfile).exists() or update is True:
if out_logging:
logger.warning(
f"Stage 3 of 5: {WorldPop_filename} does not exist, downloading to {WorldPop_inputfile}"
Expand Down Expand Up @@ -395,9 +393,9 @@ def convert_gdp(name_file_nc, year=2015, out_logging=False):
) # Input filepath nc

# Check if file exists, otherwise throw exception
if not pathlib.Path(GDP_nc).exists():
if not get_path(GDP_nc).exists():
raise Exception(
f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {pathlib.Path(GDP_nc).parent}"
f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {get_path(GDP_nc).parent}"
)

# open nc dataset
Expand Down Expand Up @@ -439,7 +437,7 @@ def load_gdp(
BASE_DIR, "data", "GDP", name_file_tif
) # Input filepath tif

if update | (not pathlib.Path(GDP_tif).exists()):
if update | (not get_path(GDP_tif).exists()):
if out_logging:
logger.warning(
f"Stage 5 of 5: File {name_file_tif} not found, the file will be produced by processing {name_file_nc}"
Expand Down
4 changes: 2 additions & 2 deletions scripts/cluster_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@
:align: center
"""

import pathlib
from functools import reduce

import geopandas as gpd
Expand All @@ -134,6 +133,7 @@
configure_logging,
create_logger,
get_aggregation_strategies,
get_path,
mock_snakemake,
normed,
update_p_nom_max,
Expand Down Expand Up @@ -626,7 +626,7 @@ def clustering_for_n_clusters(


def save_to_geojson(s, fn):
pathlib.Path(fn).unlink(missing_ok=True)
get_path(fn).unlink(missing_ok=True)
df = s.reset_index()
schema = {**gpd.io.file.infer_schema(df), "geometry": "Unknown"}
df.to_file(fn, driver="GeoJSON", schema=schema)
Expand Down
4 changes: 1 addition & 3 deletions scripts/download_osm_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,12 @@
- ``data/osm/out``: Prepared power data as .geojson and .csv files per country
- ``resources/osm/raw``: Prepared and per type (e.g. cable/lines) aggregated power data as .geojson and .csv files
"""
import pathlib
import shutil

from _helpers import (
BASE_DIR,
configure_logging,
create_logger,
get_current_directory_path,
get_path,
mock_snakemake,
read_osm_config,
Expand Down Expand Up @@ -135,7 +133,7 @@ def convert_iso_to_geofk(
for name in names:
for f in out_formats:
new_file_name = get_path(store_path_resources, f"all_raw_{name}s.{f}")
old_files = list(pathlib.Path(out_path).glob(f"*{name}.{f}"))
old_files = list(get_path(out_path).glob(f"*{name}.{f}"))
# if file is missing, create empty file, otherwise rename it and move it
if not old_files:
with open(new_file_name, "w") as f:
Expand Down
19 changes: 9 additions & 10 deletions scripts/make_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
This rule creates a dataframe containing in the columns the relevant statistics for the current run.
"""

import pathlib

import geopandas as gpd
import numpy as np
import pandas as pd
Expand All @@ -34,6 +32,7 @@
from _helpers import (
create_country_list,
create_logger,
get_path,
get_path_size,
mock_snakemake,
three_2_two_digits_country,
Expand Down Expand Up @@ -127,7 +126,7 @@ def collect_basic_osm_stats(path, rulename, header):
"""
Collect basic statistics on OSM data: number of items
"""
if pathlib.Path(path).is_file() and get_path_size(path) > 0:
if get_path(path).is_file() and get_path_size(path) > 0:
df = gpd.read_file(path)
n_elem = len(df)

Expand All @@ -146,7 +145,7 @@ def collect_network_osm_stats(path, rulename, header, metric_crs="EPSG:3857"):
- length of the stored shapes
- length of objects with tag_frequency == 0 (DC elements)
"""
if pathlib.Path(path).is_file() and get_path_size(path) > 0:
if get_path(path).is_file() and get_path_size(path) > 0:
df = gpd.read_file(path)
n_elem = len(df)
obj_length = (
Expand Down Expand Up @@ -248,7 +247,7 @@ def collect_bus_regions_stats(bus_region_rule="build_bus_regions"):

df = pd.DataFrame()

if pathlib.Path(fp_onshore).is_file() and pathlib.Path(fp_offshore).is_file():
if get_path(fp_onshore).is_file() and get_path(fp_offshore).is_file():
gdf_onshore = gpd.read_file(fp_onshore)
gdf_offshore = gpd.read_file(fp_offshore)

Expand Down Expand Up @@ -290,7 +289,7 @@ def capacity_stats(df):
else:
return df.groupby("carrier").p_nom.sum().astype(float)

if pathlib.Path(network_path).is_file():
if get_path(network_path).is_file():
n = pypsa.Network(network_path)

lines_length = float((n.lines.length * n.lines.num_parallel).sum())
Expand Down Expand Up @@ -345,7 +344,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"):
"""
snakemake = _mock_snakemake(rulename)

if not pathlib.Path(snakemake.output.africa_shape).is_file():
if not get_path(snakemake.output.africa_shape).is_file():
return pd.DataFrame()

df_continent = gpd.read_file(snakemake.output.africa_shape)
Expand All @@ -356,7 +355,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"):
.geometry.area.iloc[0]
)

if not pathlib.Path(snakemake.output.gadm_shapes).is_file():
if not get_path(snakemake.output.gadm_shapes).is_file():
return pd.DataFrame()

df_gadm = gpd.read_file(snakemake.output.gadm_shapes)
Expand Down Expand Up @@ -470,7 +469,7 @@ def collect_renewable_stats(rulename, technology):
"""
snakemake = _mock_snakemake(rulename, technology=technology)

if pathlib.Path(snakemake.output.profile).is_file():
if get_path(snakemake.output.profile).is_file():
res = xr.open_dataset(snakemake.output.profile)

if technology == "hydro":
Expand Down Expand Up @@ -503,7 +502,7 @@ def add_computational_stats(df, snakemake, column_name=None):
comp_data = [np.nan] * 3 # total_time, mean_load and max_memory

if snakemake.benchmark:
if not pathlib.Path(snakemake.benchmark).is_file():
if not get_path(snakemake.benchmark).is_file():
return df

bench_data = pd.read_csv(snakemake.benchmark, delimiter="\t")
Expand Down
4 changes: 1 addition & 3 deletions scripts/make_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@
Replacing *summaries* with *plots* creates nice colored maps of the results.
"""

import pathlib

import pandas as pd
import pypsa
from _helpers import (
Expand Down Expand Up @@ -503,7 +501,7 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all

for label, filename in networks_dict.items():
print(label, filename)
if not pathlib.Path(filename).exists():
if not get_path(filename).exists():
print("does not exist!!")
continue

Expand Down
6 changes: 3 additions & 3 deletions scripts/monte_carlo.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def monte_carlo_sampling_pydoe2(
lh = rescale_distribution(lh, uncertainties_values)
discrepancy = qmc.discrepancy(lh)
logger.info(
"Discrepancy is:", discrepancy, " more details in function documentation."
f"Discrepancy is: {discrepancy} more details in function documentation."
)

return lh
Expand Down Expand Up @@ -142,7 +142,7 @@ def monte_carlo_sampling_chaospy(
lh = rescale_distribution(lh, uncertainties_values)
discrepancy = qmc.discrepancy(lh)
logger.info(
"Discrepancy is:", discrepancy, " more details in function documentation."
f"Discrepancy is: {discrepancy} more details in function documentation."
)

return lh
Expand Down Expand Up @@ -185,7 +185,7 @@ def monte_carlo_sampling_scipy(
lh = rescale_distribution(lh, uncertainties_values)
discrepancy = qmc.discrepancy(lh)
logger.info(
"Discrepancy is:", discrepancy, " more details in function documentation."
f"Discrepancy is: {discrepancy} more details in function documentation."
)

return lh
Expand Down
Loading

0 comments on commit 196d255

Please sign in to comment.