Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revision locate bus #1195

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 59 additions & 94 deletions scripts/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1214,118 +1214,83 @@ def download_GADM(country_code, update=False, out_logging=False):
return GADM_inputfile_gpkg, GADM_filename


def get_GADM_layer(country_list, layer_id, update=False, outlogging=False):
"""
Function to retrieve a specific layer id of a geopackage for a selection of
countries.

Parameters
----------
country_list : str
List of the countries
layer_id : int
Layer to consider in the format GID_{layer_id}.
When the requested layer_id is greater than the last available layer, then the last layer is selected.
When a negative value is requested, then, the last layer is requested
"""
# initialization of the list of geodataframes
geodf_list = []

for country_code in country_list:
# download file gpkg
file_gpkg, name_file = download_GADM(country_code, update, outlogging)

# get layers of a geopackage
list_layers = fiona.listlayers(file_gpkg)

# get layer name
if layer_id < 0 | layer_id >= len(list_layers):
# when layer id is negative or larger than the number of layers, select the last layer
layer_id = len(list_layers) - 1
code_layer = np.mod(layer_id, len(list_layers))
layer_name = (
f"gadm36_{two_2_three_digits_country(country_code).upper()}_{code_layer}"
)

# read gpkg file
geodf_temp = gpd.read_file(file_gpkg, layer=layer_name)

# convert country name representation of the main country (GID_0 column)
geodf_temp["GID_0"] = [
three_2_two_digits_country(twoD_c) for twoD_c in geodf_temp["GID_0"]
]

# create a subindex column that is useful
# in the GADM processing of sub-national zones
geodf_temp["GADM_ID"] = geodf_temp[f"GID_{code_layer}"]
def _get_shape_col_gdf(path_to_gadm, co, gadm_level, gadm_clustering):
from build_shapes import get_GADM_layer
ekatef marked this conversation as resolved.
Show resolved Hide resolved

# concatenate geodataframes
geodf_list = pd.concat([geodf_list, geodf_temp])

geodf_GADM = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True))
geodf_GADM.set_crs(geodf_list[0].crs, inplace=True)
col = "name"
if not gadm_clustering:
gdf_shapes = gpd.read_file(path_to_gadm)
else:
if path_to_gadm:
gdf_shapes = gpd.read_file(path_to_gadm)
if "GADM_ID" in gdf_shapes.columns:
col = "GADM_ID"
Comment on lines +1220 to +1227
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure it's really easy this part with four nested if-conditions. Agree that it's not the point to be addressed now, as the fix is quite urgent, but could we probably add a TODO for that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completely agree; this is to maintain the features of the previous code.
To revise this I believe there is the need to revise this together with the revision of the alternative clustering procedure.
Good to add a TODO


return geodf_GADM
if gdf_shapes[col][0][
:3
].isalpha(): # TODO clean later by changing all codes to 2 letters
gdf_shapes[col] = gdf_shapes[col].apply(
lambda name: three_2_two_digits_country(name[:3]) + name[3:]
)
else:
gdf_shapes = get_GADM_layer(co, gadm_level)
col = "GID_{}".format(gadm_level)
gdf_shapes = gdf_shapes[gdf_shapes[col].str.contains(co)]
return gdf_shapes, col


def locate_bus(
coords,
co,
df,
countries,
gadm_level,
path_to_gadm=None,
gadm_clustering=False,
col="name",
dropnull=True,
col_out=None,
):
"""
Function to locate the right node for a coordinate set input coords of
point.
Function to locate the points of the dataframe df into the GADM shapefile.

Parameters
----------
coords: pandas dataseries
dataseries with 2 rows x & y representing the longitude and latitude
co: string (code for country where coords are MA Morocco)
code of the countries where the coordinates are
df: pd.Dataframe
Dataframe with mandatory x, y and country columns
countries: list
List of target countries
gadm_level: int
GADM level to be used
path_to_gadm: str (default None)
Path to the GADM shapefile
gadm_clustering: bool (default False)
True if gadm clustering is adopted
dropnull: bool (default True)
True if the rows with null values should be dropped
col_out: str (default gadm_{gadm_level})
Name of the output column
"""
col = "name"
if not gadm_clustering:
gdf = gpd.read_file(path_to_gadm)
else:
if path_to_gadm:
gdf = gpd.read_file(path_to_gadm)
if "GADM_ID" in gdf.columns:
col = "GADM_ID"
if col_out is None:
col_out = "gadm_{}".format(gadm_level)
df = df[df.country.isin(countries)]
df[col_out] = None
for co in countries:
gdf_shape, col = _get_shape_col_gdf(
Comment on lines +1273 to +1276
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recognise that it may be opening Pandora box, but wouldn't it be possible to replace naming for df and col with something a bit more specific? To my perception, it could probably facilitate a bit reading of the code. What do you think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I'll try :)

path_to_gadm, co, gadm_level, gadm_clustering
)
sub_df = df.loc[df.country == co, ["x", "y", "country"]]
gdf = gpd.GeoDataFrame(
sub_df,
geometry=gpd.points_from_xy(sub_df.x, sub_df.y),
crs="EPSG:4326",
)

if gdf[col][0][
:3
].isalpha(): # TODO clean later by changing all codes to 2 letters
gdf[col] = gdf[col].apply(
lambda name: three_2_two_digits_country(name[:3]) + name[3:]
)
else:
gdf = get_GADM_layer(co, gadm_level)
col = "GID_{}".format(gadm_level)
gdf_merged = gpd.sjoin_nearest(gdf, gdf_shape, how="inner", rsuffix="right")

# gdf.set_index("GADM_ID", inplace=True)
gdf_co = gdf[
gdf[col].str.contains(co)
] # geodataframe of entire continent - output of prev function {} are placeholders
# in strings - conditional formatting
# insert any variable into that place using .format - extract string and filter for those containing co (MA)
point = Point(coords["x"], coords["y"]) # point object
df.loc[gdf_merged.index, col_out] = gdf_merged[col]

if gdf_co.empty:
return None
if dropnull:
df = df[df[col_out].notnull()]

try:
return gdf_co[gdf_co.contains(point)][
col
].item() # filter gdf_co which contains point and returns the bus

except ValueError:
return gdf_co[gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))][
col
].item() # looks for closest one shape=node
return df


def get_conv_factors(sector):
Expand Down
24 changes: 7 additions & 17 deletions scripts/add_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,14 @@ def select_ports(n):
keep_default_na=False,
).squeeze()

# ports = raw_ports[["name", "country", "fraction", "x", "y"]]
# ports.loc[:, "fraction"] = ports.fraction.round(1)

ports = ports[ports.country.isin(countries)]
if len(ports) < 1:
logger.error(
"No export ports chosen, please add ports to the file data/export_ports.csv"
)
gadm_level = snakemake.params.gadm_level

ports["gadm_{}".format(gadm_level)] = ports[["x", "y", "country"]].apply(
lambda port: locate_bus(
port[["x", "y"]],
port["country"],
gadm_level,
snakemake.input["shapes_path"],
snakemake.params.alternative_clustering,
),
axis=1,
ports = locate_bus(
ports,
countries,
gadm_level,
snakemake.input.shapes_path,
snakemake.params.alternative_clustering,
)

# TODO: revise if ports quantity and property by shape become relevant
Expand Down Expand Up @@ -219,6 +208,7 @@ def create_export_profile():
discountrate="0.071",
demand="AB",
h2export="120",
# configfile="test/config.test1.yaml",
)

overrides = override_component_attrs(snakemake.input.overrides)
Expand Down
16 changes: 3 additions & 13 deletions scripts/build_industrial_distribution_key.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,10 @@ def map_industry_to_buses(df, countries, gadm_level, shapes_path, gadm_clusterin
Only cement not steel - proof of concept.
Change hotmaps to more descriptive name, etc.
"""
Comment on lines 30 to 32
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A docstring of map_industry_to_buses( ) seems to be a bit outdated. May it be a good idea to fix it?

df = df[df.country.isin(countries)]
df["gadm_{}".format(gadm_level)] = df[["x", "y", "country"]].apply(
lambda site: locate_bus(
site[["x", "y"]].astype("float"),
site["country"],
gadm_level,
shapes_path,
gadm_clustering,
),
axis=1,
df = locate_bus(df, countries, gadm_level, shapes_path, gadm_clustering).set_index(
"gadm_" + str(gadm_level)
)

return df.set_index("gadm_" + str(gadm_level))
return df


def build_nodal_distribution_key(
Expand Down Expand Up @@ -122,7 +113,6 @@ def match_technology(df):

snakemake = mock_snakemake(
"build_industrial_distribution_key",
simpl="",
clusters="4",
planning_horizons=2050,
demand="AB",
Expand Down
32 changes: 9 additions & 23 deletions scripts/build_powerplants.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
from _helpers import (
configure_logging,
create_logger,
locate_bus,
read_csv_nafix,
to_csv_nafix,
two_digits_2_name_country,
Expand Down Expand Up @@ -371,28 +372,13 @@ def replace_natural_gas_technology(df: pd.DataFrame):
country_list = snakemake.params.countries
geo_crs = snakemake.params.geo_crs

gdf = gpd.read_file(snakemake.input.gadm_shapes)

def locate_bus(coords, co):
gdf_co = gdf[gdf["GADM_ID"].str.contains(co)]

point = Point(coords["lon"], coords["lat"])

try:
return gdf_co[gdf_co.contains(point)][
"GADM_ID"
].item() # filter gdf_co which contains point and returns the bus

except ValueError:
return gdf_co[
gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))
][
"GADM_ID"
].item() # looks for closest one shape=node
# fixing https://github.com/pypsa-meets-earth/pypsa-earth/pull/670

ppl["region_id"] = ppl[["lon", "lat", "Country"]].apply(
lambda pp: locate_bus(pp[["lon", "lat"]], pp["Country"]), axis=1
)
ppl = locate_bus(
ppl.rename(columns={"lon": "x", "lat": "y", "Country": "country"}),
country_list,
gadm_layer_id,
snakemake.input.gadm_shapes,
snakemake.params.alternative_clustering,
col_out="region_id",
).rename(columns={"x": "lon", "y": "lat", "country": "Country"})

ppl.to_csv(snakemake.output.powerplants)
23 changes: 7 additions & 16 deletions scripts/cluster_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@
configure_logging,
create_logger,
get_aggregation_strategies,
locate_bus,
update_p_nom_max,
)
from add_electricity import load_costs
Expand Down Expand Up @@ -379,23 +380,13 @@ def n_bounds(model, *n_id):


def busmap_for_gadm_clusters(inputs, n, gadm_level, geo_crs, country_list):
gdf = gpd.read_file(inputs.gadm_shapes)

def locate_bus(coords, co):
gdf_co = gdf[gdf["GADM_ID"].str.contains(co)]
point = Point(coords["x"], coords["y"])

try:
return gdf_co[gdf_co.contains(point)]["GADM_ID"].item()

except ValueError:
return gdf_co[
gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))
]["GADM_ID"].item()

buses = n.buses
buses["gadm_{}".format(gadm_level)] = buses[["x", "y", "country"]].apply(
lambda bus: locate_bus(bus[["x", "y"]], bus["country"]), axis=1
buses = locate_bus(
n.buses,
country_list,
gadm_level,
inputs.gadm_shapes,
gadm_clustering=True,
)

buses["gadm_subnetwork"] = (
Expand Down
41 changes: 14 additions & 27 deletions scripts/prepare_sector_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,21 +1021,13 @@ def add_aviation(n, cost):

gadm_level = options["gadm_level"]

airports["gadm_{}".format(gadm_level)] = airports[["x", "y", "country"]].apply(
lambda airport: locate_bus(
airport[["x", "y"]],
airport["country"],
gadm_level,
snakemake.input.shapes_path,
snakemake.config["cluster_options"]["alternative_clustering"],
),
axis=1,
)
# To change 3 country code to 2
# airports["gadm_{}".format(gadm_level)] = airports["gadm_{}".format(gadm_level)].apply(
# lambda cocode: three_2_two_digits_country(cocode[:3]) + " " + cocode[4:-2])

airports = airports.set_index("gadm_{}".format(gadm_level))
airports = locate_bus(
airports,
countries,
gadm_level,
snakemake.input.shapes_path,
snakemake.config["cluster_options"]["alternative_clustering"],
).set_index("gadm_{}".format(gadm_level))

ind = pd.DataFrame(n.buses.index[n.buses.carrier == "AC"])

Expand Down Expand Up @@ -1304,18 +1296,13 @@ def add_shipping(n, costs):
options["shipping_hydrogen_share"], demand_sc + "_" + str(investment_year)
)

ports["gadm_{}".format(gadm_level)] = ports[["x", "y", "country"]].apply(
lambda port: locate_bus(
port[["x", "y"]],
port["country"],
gadm_level,
snakemake.input["shapes_path"],
snakemake.config["cluster_options"]["alternative_clustering"],
),
axis=1,
)

ports = ports.set_index("gadm_{}".format(gadm_level))
ports = locate_bus(
ports,
countries,
gadm_level,
snakemake.input.shapes_path,
snakemake.config["cluster_options"]["alternative_clustering"],
).set_index("gadm_{}".format(gadm_level))

ind = pd.DataFrame(n.buses.index[n.buses.carrier == "AC"])
ind = ind.set_index(n.buses.index[n.buses.carrier == "AC"])
Expand Down
Loading