-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
issue #12 initial temporal and spatial splitter functions
- Loading branch information
1 parent
69414ca
commit fe2f3ac
Showing
1 changed file
with
110 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
"""Utilities to split openeo batch jobs for a given temporal and spatial extent. | ||
Used to avoid launching jobs in openeo that are too large. | ||
""" | ||
|
||
import math | ||
from datetime import datetime, timedelta | ||
from typing import Optional, Union | ||
|
||
import geopandas as gpd | ||
from geojson import GeoJSON | ||
from shapely.geometry import Polygon, box | ||
|
||
from openeo_gfmap import TemporalContext | ||
|
||
|
||
def split_polygon( | ||
geometries: gpd.GeoDataFrame, tile_size: Optional[int] = 10000 | ||
) -> dict: | ||
"""Takes a FeatureCollection of polygons and splits it into multiple FeatureCollections | ||
based on a specified tile_size | ||
Parameters | ||
---------- | ||
geometries: gpd.GeoDataFrame | ||
The polygons represented in a geopandas GeoDataFrame. | ||
tile_size: Optional[int] | ||
The tile size in meters to specify the area size used to split the polygon Featurecollection. | ||
By default the splitting is done on tiles of 10km x 10km. | ||
Returns | ||
------- | ||
splitted_geometries: dict | ||
Dictionary of splitted geometries. | ||
""" | ||
geometries = geometries.to_crs("EPSG:32631") | ||
|
||
# Determine the bounding box of the entire FeatureCollection | ||
bbox = geometries.total_bounds | ||
|
||
# Calculate the number of tiles in x and y directions | ||
num_tiles_x = math.ceil((bbox[2] - bbox[0]) / tile_size) | ||
num_tiles_y = math.ceil((bbox[3] - bbox[1]) / tile_size) | ||
|
||
splitted_geometries = {} | ||
for i in range(num_tiles_x): | ||
for j in range(num_tiles_y): | ||
# Construct the bbox of the tile | ||
tile_bbox = ( | ||
bbox[0] + i * tile_size, | ||
bbox[1] + j * tile_size, | ||
bbox[0] + min((i + 1) * tile_size, bbox[2]), | ||
bbox[1] + min((j + 1) * tile_size, bbox[3]), | ||
) | ||
|
||
# Convert the bbox to a Polygon | ||
tile_polygon = box(tile_bbox[0], tile_bbox[1], tile_bbox[2], tile_bbox[3]) | ||
|
||
# Construct a new GeoDataFrame consisting only of Polygons that intersect or are contained within the tile polygon | ||
# TODO: using intersects will lead to duplicates. Add a 'splitted' flag to the gdf, so that only polygons with splitted_flag == FALSE are selected? | ||
intersecting_polygons = geometries[ | ||
geometries.intersects(tile_polygon) | geometries.contains(tile_polygon) | ||
] | ||
splitted_geometries[f"tile_{i}{j}"] = intersecting_polygons | ||
|
||
return splitted_geometries | ||
|
||
|
||
# TODO: make interval variable | ||
def split_temporal( | ||
temporal_extent: TemporalContext, interval: Optional[str] = "monthly" | ||
) -> list: | ||
"""Takes a FeatureCollection of polygons and splits it into multiple FeatureCollections | ||
based on a specified tile_size | ||
Parameters | ||
---------- | ||
temporal_extent: TemporalContext | ||
The full temporal extent that needs to be splitted. | ||
interval: Optional[str] = 'monthly' | ||
The interval size used to split the temporal extent. | ||
Returns | ||
------- | ||
splitted_temporal_extent: list | ||
A list of splitted TemporalContext objects. | ||
""" | ||
start_date = datetime.strptime(temporal_extent.start_date, "%Y-%m-%d") | ||
end_date = datetime.strptime(temporal_extent.end_date, "%Y-%m-%d") | ||
|
||
current_start_date = start_date | ||
current_end_date = (current_start_date.replace(day=1) + timedelta(days=32)).replace( | ||
day=1 | ||
) | ||
|
||
splitted_temporal_extent = [] | ||
while current_start_date < end_date: | ||
current_end_date = ( | ||
current_start_date.replace(day=1) + timedelta(days=32) | ||
).replace(day=1) | ||
|
||
current_end_date = min(current_end_date, end_date) | ||
|
||
splitted_temporal_extent.append( | ||
[ | ||
current_start_date.strftime("%Y-%m-%d"), | ||
current_end_date.strftime("%Y-%m-%d"), | ||
] | ||
) | ||
|
||
current_start_date = current_end_date | ||
|
||
return splitted_temporal_extent |