Skip to content

Commit

Permalink
Merge pull request #178 from nismod/feature/heterogeneous_density_roa…
Browse files Browse the repository at this point in the history
…d_network

Compose road/rail networks from multiple OSM extracts
  • Loading branch information
thomas-fred authored Mar 18, 2024
2 parents 6e3899c + e51684e commit c0874d5
Show file tree
Hide file tree
Showing 38 changed files with 218 additions and 109 deletions.
5 changes: 5 additions & 0 deletions config/composite_network/south-east-asia-rail.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
infrastructure_dataset,network_filter
thailand-latest,rail
laos-latest,rail
cambodia-latest,rail
myanmar-latest,rail
5 changes: 5 additions & 0 deletions config/composite_network/south-east-asia-road.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
infrastructure_dataset,network_filter
thailand-latest,road-secondary
laos-latest,road-primary
cambodia-latest,road-primary
myanmar-latest,road-primary
66 changes: 46 additions & 20 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,13 @@
#####################################
### FLOODING / TRANSPORT WORKFLOW ###
#####################################

## Aqueduct Analysis ##
# This should be a named list of files that specify hazard raster files
# to retrieve using wget -i
hazard_datasets:
aqueduct-coast: 'https://raw.githubusercontent.com/mjaquiery/aqueduct/main/tiffs.txt'
#aqueduct-coast: 'config/hazard_resource_locations/aqueduct-coast_wtsub_perc95.txt'
#aqueduct-river: 'config/hazard_resource_locations/aqueduct-river_rcp4p5_MIROC-ESM-CHEM_2030_tifs.txt' # subset of aqueduct_tifs.txt
aqueduct-river: 'config/hazard_resource_locations/aqueduct-river.txt'
jba-event: 'config/hazard_resource_locations/jba-events.txt'

hazard_types:
aqueduct-coast: 'flood'
aqueduct-river: 'flood'
jba-event: 'flood'
##########################
### TRANSPORT WORKFLOW ###
##########################

# OSM datasets in PBF format, principally from: https://download.geofabrik.de/ #
infrastructure_datasets:
# note that there is a JSON index with country ISO-A2 codes here:
# https://download.geofabrik.de/index-v1-nogeom.json
# we could do lookup from ISO code to URL using this file, to avoid registering URLs here
#
# whole planet file
planet-latest: 'https://planet.osm.org/pbf/planet-latest.osm.pbf'
# 'continent' extracts
Expand All @@ -30,31 +19,68 @@ infrastructure_datasets:
north-america-latest: 'http://download.geofabrik.de/north-america-latest.osm.pbf'
south-america-latest: 'http://download.geofabrik.de/south-america-latest.osm.pbf'
# country extracts
bangladesh-latest: 'https://download.geofabrik.de/asia/bangladesh-latest.osm.pbf'
cambodia-latest: 'https://download.geofabrik.de/asia/cambodia-latest.osm.pbf'
china-latest: 'https://download.geofabrik.de/asia/china-latest.osm.pbf'
djibouti-latest: 'https://download.geofabrik.de/africa/djibouti-latest.osm.pbf'
egypt-latest: 'http://download.geofabrik.de/africa/egypt-latest.osm.pbf'
great-britain-latest: 'http://download.geofabrik.de/europe/great-britain-latest.osm.pbf'
india-latest: 'https://download.geofabrik.de/asia/india-latest.osm.pbf'
jamaica-latest: 'http://download.geofabrik.de/central-america/jamaica-latest.osm.pbf'
kenya-latest: 'http://download.geofabrik.de/africa/kenya-latest.osm.pbf'
laos-latest: 'https://download.geofabrik.de/asia/laos-latest.osm.pbf'
myanmar-latest: 'https://download.geofabrik.de/asia/myanmar-latest.osm.pbf'
tanzania-latest: 'https://download.geofabrik.de/africa/tanzania-latest.osm.pbf'
thailand-latest: 'https://download.geofabrik.de/asia/thailand-latest.osm.pbf'
wales-latest: 'https://download.geofabrik.de/europe/great-britain/wales-latest.osm.pbf'
vietnam-latest: 'https://download.geofabrik.de/asia/vietnam-latest.osm.pbf'
# small extract for testing purposes
tanzania-mini: 'https://raw.githubusercontent.com/mjaquiery/aqueduct/main/tanzania-mini.osm.pbf'

# these files contain osmium filter expressions for selecting relevant nodes, ways and relations from .osm.pbf files
# the keys in the mapping, i.e. 'road' and 'rail' will be used to create FILTER_SLUG in rules
network_filters:
road: 'config/osm_filters/road-residential.txt'
road-residential: 'config/osm_filters/road-residential.txt'
road-tertiary: 'config/osm_filters/road-tertiary.txt'
road-secondary: 'config/osm_filters/road-secondary.txt'
road-primary: 'config/osm_filters/road-primary.txt'
rail: 'config/osm_filters/rail.txt'

# define a set of partial transport networks to create and interconnect
# file should be a CSV with `infrastructure_dataset` and `network_filters` columns (with header)
# e.g.
# infrastructure_dataset,network_filter
# thailand-latest,road-secondary
# cambodia-latest,road-primary
# laos-latest,road-primary
composite_network:
south-east-asia-road: 'config/composite_network/south-east-asia-road.csv'
south-east-asia-rail: 'config/composite_network/south-east-asia-rail.csv'

# OSM tag data to retain on selected features, typically for usage in network annotation/analysis
# N.B. feature SELECTION is done with the expressions pointed to from network_filters
keep_tags:
road: ['highway', 'surface', 'bridge', 'maxspeed', 'lanes']
rail: ['railway', 'bridge', 'name', 'gauge', 'usage']

# Number of slices to cut dataset into -- must be a square number
slice_count: 4
slice_count: 64


#########################
### FLOODING WORKFLOW ###
#########################

hazard_datasets:
# hazard rasters to retrieve
aqueduct-coast: 'config/hazard_resource_locations/aqueduct-coast_wtsub_perc95.txt'
aqueduct-river: 'config/hazard_resource_locations/aqueduct-river.txt'
jba-event: 'config/hazard_resource_locations/jba-events.txt'

hazard_types:
aqueduct-coast: 'flood'
aqueduct-river: 'flood'
jba-event: 'flood'

direct_damages:
# assets to calculate direct damages for
Expand Down
1 change: 1 addition & 0 deletions config/osm_filters/road-primary.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
w/highway=motorway,motorway_link,trunk,trunk_link,primary,primary_link
2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ dependencies:
- cython==0.29.28 # c--python interface
- contextily # basemaps for plots
- datashader # plotting large datasets
# test_grid_exposure_by_admin_region.py fails with 2024.3.0
- dask<2024.3.0 # larger-than-memory computing
- flake8 # linter
- gdal>=3.3 # command-line tools for spatial data
- geopandas==0.14.1 # geospatial dataframes
Expand Down
6 changes: 0 additions & 6 deletions src/open_gira/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,4 @@ def create_network(
logging.info("Creating network topology")
network = snkit.network.add_topology(network, id_col="id")

network.edges.rename(
columns={"from_id": "from_node_id", "to_id": "to_node_id", "id": "edge_id"},
inplace=True,
)
network.nodes.rename(columns={"id": "node_id"}, inplace=True)

return network
2 changes: 1 addition & 1 deletion tests/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ infrastructure_datasets:
# these files contain osmium filter expressions for selecting relevant nodes, ways and relations from .osm.pbf files
# the keys in the mapping, i.e. 'road' and 'rail' will be used to create FILTER_SLUG in rules
network_filters:
road: 'config/osm_filters/road-secondary.txt'
road-secondary: 'config/osm_filters/road-secondary.txt'
rail: 'config/osm_filters/rail.txt'

# OSM tag data to retain on selected features, typically for usage in network annotation/analysis
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/integration/test_filter_osm_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ def test_filter_osm_data():
runner.run_snakemake_test(
"filter_osm_data",
(
"results/input/OSM/djibouti-latest_filter-road.osm.pbf",
"results/input/OSM/djibouti-latest_filter-road-secondary.osm.pbf",
)
)
4 changes: 2 additions & 2 deletions tests/integration/test_join_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def test_join_network():
runner.run_snakemake_test(
"join_network",
(
"results/djibouti-latest_filter-road/nodes.geoparquet",
"results/djibouti-latest_filter-road/edges.geoparquet",
"results/djibouti-latest_filter-road/nodes.gpq",
"results/djibouti-latest_filter-road/edges.gpq",
)
)
5 changes: 5 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ if not config["best_estimate_windspeed_failure_threshold"] in config["transmissi

# Constrain wildcards to NOT use _ or /
wildcard_constraints:
# the output dir must end in 'results'
# e.g. '/data/open-gira/results', './results', '/my-results', etc.
# this prevents us matching past it into other folders for an incorrect OUTPUT_DIR,
# but is more flexible than reading a value from, for example, config.yaml
OUTPUT_DIR="^.*results",
DATASET="[^_/]+",
SLICE_SLUG="slice-[0-9]+",
FILTER_SLUG="filter-[^_/]+",
Expand Down
17 changes: 16 additions & 1 deletion workflow/power-tc/grid_exposure_by_admin_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,21 @@
exposure_by_event = dask.dataframe.read_parquet(snakemake.input.exposure_by_event)

# calculate number of years between first and last storm event, necessary for expected annual exposure

# N.B. dask==2024.3.0 breaks the line assigning to event_ids -- the index will not resolve to values

# pytest -s tests/integration/test_exposure_by_admin_region.py

# the following seems to know about the index values, look at the repr str..!
# (Pdb) exposure_by_event.index.compute()
# Empty DataFrame
# Columns: []
# Index: [2007345N18298, ..., 2022255N15324]

# but actually try and access those values ...
# (Pdb) exposure_by_event.index.compute().values
# array([], shape=(10, 0), dtype=float64)

event_ids: list[str] = list(set(exposure_by_event.index))
years: set[int] = set(track_year.loc[event_ids, "year"])
span_years: int = max([1, max(years) - min(years)]) # with a minimum of one
Expand Down Expand Up @@ -102,4 +117,4 @@

# write out to disk
logging.info("Writing out with region geometry")
gpd.GeoDataFrame(exposure_with_length).to_parquet(snakemake.output.expected_annual_exposure)
gpd.GeoDataFrame(exposure_with_length).to_parquet(snakemake.output.expected_annual_exposure)
4 changes: 2 additions & 2 deletions workflow/power-tc/network_components.smk
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
rule network_components:
input:
nodes="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/nodes.geoparquet",
edges="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/edges.geoparquet",
nodes="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/nodes.gpq",
edges="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/edges.gpq",
output:
component_population="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/component_population.svg",
component_map="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/network_map_by_component.png",
Expand Down
12 changes: 6 additions & 6 deletions workflow/transport-flood/flood_damages.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ rule return_period_direct_damages:
input:
unsplit = rules.create_transport_network.output.edges, # for pre-intersection geometry
exposure = rules.rasterise_osm_network.output.geoparquet,
rehab_cost=lambda wildcards: f"config/rehab_costs/{wildcards.FILTER_SLUG.replace('filter-', '')}.csv",
rehab_cost=lambda wildcards: f"config/rehab_costs/{wildcards.FILTER_SLUG.split('-')[1]}.csv",
damage_curves="config/damage_curves/",
output:
damage_fraction = "{OUTPUT_DIR}/direct_damages/{DATASET}_{FILTER_SLUG}/{HAZARD_SLUG}/fraction_per_RP/{SLICE_SLUG}.geoparquet",
Expand All @@ -25,7 +25,7 @@ rule return_period_direct_damages:

"""
Test with:
snakemake --cores 1 results/direct_damages/egypt-latest_filter-road/hazard-aqueduct-river/EAD_and_cost_per_RP/slice-5.geoparquet
snakemake --cores 1 results/direct_damages/egypt-latest_filter-road-tertiary/hazard-aqueduct-river/EAD_and_cost_per_RP/slice-5.geoparquet
"""


Expand All @@ -47,20 +47,20 @@ rule event_set_direct_damages:
input:
unsplit = rules.create_transport_network.output.edges, # for pre-intersection geometry
exposure = rules.rasterise_osm_network.output.geoparquet,
rehab_cost=lambda wildcards: f"config/rehab_costs/{wildcards.FILTER_SLUG.replace('filter-', '')}.csv",
rehab_cost=lambda wildcards: f"config/rehab_costs/{wildcards.FILTER_SLUG.split('-')[1]}.csv",
damage_curves="config/damage_curves/",
output:
damage_fraction = "{OUTPUT_DIR}/direct_damages/{DATASET}_{FILTER_SLUG}/{HAZARD_SLUG}/fraction/{SLICE_SLUG}.gpq",
damage_cost = "{OUTPUT_DIR}/direct_damages/{DATASET}_{FILTER_SLUG}/{HAZARD_SLUG}/cost/{SLICE_SLUG}.gpq",
params:
network_type=lambda wildcards: wildcards.FILTER_SLUG.replace('filter-', ''),
network_type=lambda wildcards: wildcards.FILTER_SLUG.split('-')[1],
hazard_type=lambda wildcards: config["hazard_types"][wildcards.HAZARD_SLUG.replace('hazard-', '')]
script:
"./event_set_direct_damages.py"

"""
Test with:
snakemake --cores 1 results/direct_damages/thailand-latest_filter-road/hazard-jba-event/cost/slice-5.gpq
snakemake --cores 1 results/direct_damages/thailand-latest_filter-road-primary/hazard-jba-event/cost/slice-5.gpq
"""


Expand Down Expand Up @@ -92,4 +92,4 @@ rule concat_event_set_direct_damages:
"""
Test with:
snakemake -c1 results/direct_damages/thailand-latest_filter-road/hazard-jba-event/cost.gpq
"""
"""
75 changes: 74 additions & 1 deletion workflow/transport/create_network.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
"""
Generic network creation rules.
"""


import logging

import geopandas as gpd
import pandas as pd

import snkit


rule create_transport_network:
Expand All @@ -13,7 +24,8 @@ rule create_transport_network:
edges="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/processed/{SLICE_SLUG}_edges.geoparquet"
params:
# determine the network type from the filter, e.g. road, rail
network_type=lambda wildcards: wildcards.FILTER_SLUG.replace('filter-', ''),
# example FILTER_SLUG values might be 'filter-road-tertiary' or 'filter-rail'
network_type=lambda wildcards: wildcards.FILTER_SLUG.split('-')[1],
# pass in the slice number so we can label edges and nodes with their slice
# edge and node IDs should be unique across all slices
slice_number=lambda wildcards: int(wildcards.SLICE_SLUG.replace('slice-', ''))
Expand All @@ -25,3 +37,64 @@ rule create_transport_network:
Test with:
snakemake --cores all results/geoparquet/tanzania-mini_filter-road/processed/slice-0_edges.geoparquet
"""


def transport_network_paths_from_file(wildcards) -> list[str]:
"""
Lookup composite network components from file and return list of paths to
their edge files.
"""
df = pd.read_csv(
config["composite_network"][wildcards.COMPOSITE],
comment="#",
)
edge_paths = df.apply(
lambda row:
f"{wildcards.OUTPUT_DIR}/{row.infrastructure_dataset}_filter-{row.network_filter}/edges.gpq",
axis=1
)
node_paths = [path.replace("edges.gpq", "nodes.gpq") for path in edge_paths]
return {"component_nodes": node_paths, "component_edges": edge_paths}


rule create_composite_transport_network:
"""
Stitch together a set of transport networks into one file. These should be
the same transport mode. May be used for creating networks of spatially
varying density.
"""
input:
unpack(transport_network_paths_from_file)
output:
composite_nodes = "{OUTPUT_DIR}/composite_network/{COMPOSITE}/nodes.gpq",
composite_edges = "{OUTPUT_DIR}/composite_network/{COMPOSITE}/edges.gpq"
run:
logging.basicConfig(format="%(asctime)s %(process)d %(filename)s %(message)s", level=logging.INFO)

logging.info("Concatenate nodes and edges")
nodes = []
for node_path in input.component_nodes:
nodes.append(gpd.read_parquet(node_path))
edges = []
for edge_path in input.component_edges:
edges.append(gpd.read_parquet(edge_path))

network = snkit.network.Network(
nodes=pd.concat(nodes).reset_index(drop=True),
edges=pd.concat(edges).reset_index(drop=True),
)

logging.info("Labelling edge ends with from/to node ids")
network = snkit.network.add_topology(network)

logging.info("Labelling edges and nodes with network component ids")
network = snkit.network.add_component_ids(network)

logging.info("Writing network to disk")
network.nodes.to_parquet(output.composite_nodes)
network.edges.to_parquet(output.composite_edges)

"""
Test with:
snakemake -c1 -- results/composite_network/south-east-asia-road/edges.gpq
"""
20 changes: 10 additions & 10 deletions workflow/transport/create_rail_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@

if __name__ == "__main__":

osm_edges_path = snakemake.input["edges"] # type: ignore
osm_nodes_path = snakemake.input["nodes"] # type: ignore
administrative_data_path = snakemake.input["admin"] # type: ignore
nodes_output_path = snakemake.output["nodes"] # type: ignore
edges_output_path = snakemake.output["edges"] # type: ignore
slice_number = int(snakemake.params["slice_number"]) # type: ignore

slice_number = int(slice_number)
osm_edges_path = snakemake.input["edges"]
osm_nodes_path = snakemake.input["nodes"]
administrative_data_path = snakemake.input["admin"]
dataset_name = snakemake.wildcards.DATASET
nodes_output_path = snakemake.output["nodes"]
edges_output_path = snakemake.output["edges"]
slice_number = int(snakemake.params["slice_number"])

osm_epsg = 4326

logging.basicConfig(format="%(asctime)s %(process)d %(filename)s %(message)s", level=logging.INFO)
Expand Down Expand Up @@ -58,7 +58,7 @@

# pass an id_prefix containing the slice number to ensure edges and nodes
# are uniquely identified across all slices in the network
network = create_network(edges=edges, nodes=nodes, id_prefix=f"{slice_number}")
network = create_network(edges=edges, nodes=nodes, id_prefix=f"{dataset_name}_{slice_number}")
logging.info(
f"Network contains {len(network.edges)} edges and {len(network.nodes)} nodes"
)
Expand Down Expand Up @@ -91,4 +91,4 @@
network.edges.to_parquet(edges_output_path)
network.nodes.to_parquet(nodes_output_path)

logging.info("Done creating network")
logging.info("Done creating network")
Loading

0 comments on commit c0874d5

Please sign in to comment.