Skip to content

Commit

Permalink
Include all regions, whether affected or not.
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas-fred committed Dec 11, 2023
1 parent fdfbdc7 commit 554b1c2
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 12 deletions.
26 changes: 19 additions & 7 deletions workflow/scripts/exposure/grid_disruption_by_admin_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@

# event rows, threshold value columns, values are population affected summed across all targets
disruption_by_event: pd.DataFrame = pd.read_parquet(snakemake.input.disruption_by_event)
if len(disruption_by_event.index) == 0:
logging.info("No disruption data, write out empty disruption")
regions.to_parquet(snakemake.output.expected_annual_disruption)
sys.exit(0)

# calculate number of years between first and last storm event, necessary for expected annual disruption
event_ids: list[str] = list(disruption_by_event.index)
Expand All @@ -49,13 +53,22 @@
# create a lookup between target id and the region to which the target's representative point lies within
logging.info("Creating target to region mapping")
# filter out targets that are never exposed, we don't need to do an expensive sjoin on them
target_rep_points: gpd.GeoDataFrame = targets.loc[disruption_by_target.index, ["population", "geometry"]].copy()
target_rep_points: gpd.GeoDataFrame = targets.loc[:, ["population", "geometry"]].copy()
target_rep_points.geometry = target_rep_points.geometry.representative_point()
target_to_region_mapping: pd.DataFrame = target_rep_points.sjoin(regions, how="left").drop(columns=["geometry", "index_right"])
target_to_region_mapping: pd.DataFrame = target_rep_points \
.sjoin(regions, how="left") \
.drop(columns=["geometry", "index_right"])
# rename index column from "id" to "target" in preparation for merge with `disruption_by_target`
target_to_region_mapping.index = target_to_region_mapping.index.rename("target")

# merge with regions and sum targets across regions
# use how="left" to take every region, exposed or not, to give a complete table
disruption_by_region = \
target_to_region_mapping.drop(columns=[f"NAME_{admin_level}"]).merge(disruption_by_target, on="target").groupby(f"GID_{admin_level}").sum()
target_to_region_mapping \
.drop(columns=[f"NAME_{admin_level}"]) \
.merge(disruption_by_target, on="target", how="left") \
.groupby(f"GID_{admin_level}") \
.sum()

# take the disruption counts and divide by the years passing between first and last storm
# this division is aligned on the indicies (both set to target ids)
Expand All @@ -65,10 +78,9 @@

# merge geometry and name columns back in
disruption_with_geometry = \
disruption_fraction_by_region.merge(regions[[f"NAME_{admin_level}", f"GID_{admin_level}", "geometry"]], on=f"GID_{admin_level}", how="right")
# merge nominal lengths by region back in, too
disruption_with_population = disruption_with_geometry.merge(disruption_by_region[["population"]], on=f"GID_{admin_level}")
disruption_fraction_by_region \
.merge(regions[[f"NAME_{admin_level}", f"GID_{admin_level}", "geometry"]], on=f"GID_{admin_level}")

# write out to disk
logging.info("Writing out with region geometry")
gpd.GeoDataFrame(disruption_with_population).to_parquet(snakemake.output.expected_annual_disruption)
gpd.GeoDataFrame(disruption_with_geometry).to_parquet(snakemake.output.expected_annual_disruption)
11 changes: 6 additions & 5 deletions workflow/scripts/exposure/grid_exposure_by_admin_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
exposure_by_edge = dask.dataframe.read_parquet(snakemake.input.exposure_by_edge)
if len(exposure_by_edge.index) == 0:
logging.info("No exposure data, write out empty exposure")
gpd.GeoDataFrame({"geometry": []}, crs=4326).to_parquet(snakemake.output.expected_annual_exposure)
regions.to_parquet(snakemake.output.expected_annual_exposure)
sys.exit(0)

logging.info("Reading exposure by storm")
Expand All @@ -69,8 +69,7 @@

# create a lookup between edge id and the region to which the edge's representative point lies within
logging.info("Creating edge to region mapping")
# filter out edges that are never exposed, we don't need to do an expensive sjoin on them
edge_rep_points: gpd.GeoDataFrame = edges.loc[edges.reset_index().edge.isin(exposure_by_edge.index)].copy()
edge_rep_points: gpd.GeoDataFrame = edges.copy()
edge_rep_points.geometry = edge_rep_points.geometry.representative_point()

# cast to dask dataframe prior to merge with `exposure_by_edge`
Expand All @@ -81,8 +80,9 @@

logging.info("Aggregating to region level")
# merge with regions and sum edges across regions
# use how="left" to take every region, exposed or not, to give a complete table
exposure_by_region = \
edge_to_region_mapping.drop(columns=[f"NAME_{admin_level}"]).merge(exposure_by_edge, on="edge").groupby(f"GID_{admin_level}").sum()
edge_to_region_mapping.drop(columns=[f"NAME_{admin_level}"]).merge(exposure_by_edge, on="edge", how="left").groupby(f"GID_{admin_level}").sum()

# collapse the task graph, summing across edges and region
exposure_by_region: pd.DataFrame = exposure_by_region.compute()
Expand All @@ -96,9 +96,10 @@

# merge geometry and name columns back in
exposure_with_geometry = \
exposure_fraction_by_region.merge(regions[[f"NAME_{admin_level}", f"GID_{admin_level}", "geometry"]], on=f"GID_{admin_level}", how="right")
exposure_fraction_by_region.merge(regions[[f"NAME_{admin_level}", f"GID_{admin_level}", "geometry"]], on=f"GID_{admin_level}")
# merge nominal lengths by region back in, too
exposure_with_length = exposure_with_geometry.merge(exposure_by_region[["nominal_length_m"]], on=f"GID_{admin_level}")

# write out to disk
logging.info("Writing out with region geometry")
gpd.GeoDataFrame(exposure_with_length).to_parquet(snakemake.output.expected_annual_exposure)

0 comments on commit 554b1c2

Please sign in to comment.