Skip to content

Commit

Permalink
issue #12 initial temporal and spatial splitter functions
Browse files Browse the repository at this point in the history
  • Loading branch information
VincentVerelst committed Jan 30, 2024
1 parent 69414ca commit fe2f3ac
Showing 1 changed file with 110 additions and 0 deletions.
110 changes: 110 additions & 0 deletions src/openeo_gfmap/utils/job_splitting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""Utilities to split openeo batch jobs for a given temporal and spatial extent.
Used to avoid launching jobs in openeo that are too large.
"""

import math
from datetime import datetime, timedelta
from typing import Optional, Union

import geopandas as gpd
from geojson import GeoJSON
from shapely.geometry import Polygon, box

from openeo_gfmap import TemporalContext


def split_polygon(
geometries: gpd.GeoDataFrame, tile_size: Optional[int] = 10000
) -> dict:
"""Takes a FeatureCollection of polygons and splits it into multiple FeatureCollections
based on a specified tile_size
Parameters
----------
geometries: gpd.GeoDataFrame
The polygons represented in a geopandas GeoDataFrame.
tile_size: Optional[int]
The tile size in meters to specify the area size used to split the polygon Featurecollection.
By default the splitting is done on tiles of 10km x 10km.
Returns
-------
splitted_geometries: dict
Dictionary of splitted geometries.
"""
geometries = geometries.to_crs("EPSG:32631")

# Determine the bounding box of the entire FeatureCollection
bbox = geometries.total_bounds

# Calculate the number of tiles in x and y directions
num_tiles_x = math.ceil((bbox[2] - bbox[0]) / tile_size)
num_tiles_y = math.ceil((bbox[3] - bbox[1]) / tile_size)

splitted_geometries = {}
for i in range(num_tiles_x):
for j in range(num_tiles_y):
# Construct the bbox of the tile
tile_bbox = (
bbox[0] + i * tile_size,
bbox[1] + j * tile_size,
bbox[0] + min((i + 1) * tile_size, bbox[2]),
bbox[1] + min((j + 1) * tile_size, bbox[3]),
)

# Convert the bbox to a Polygon
tile_polygon = box(tile_bbox[0], tile_bbox[1], tile_bbox[2], tile_bbox[3])

# Construct a new GeoDataFrame consisting only of Polygons that intersect or are contained within the tile polygon
# TODO: using intersects will lead to duplicates. Add a 'splitted' flag to the gdf, so that only polygons with splitted_flag == FALSE are selected?
intersecting_polygons = geometries[
geometries.intersects(tile_polygon) | geometries.contains(tile_polygon)
]
splitted_geometries[f"tile_{i}{j}"] = intersecting_polygons

return splitted_geometries


# TODO: make interval variable
def split_temporal(
temporal_extent: TemporalContext, interval: Optional[str] = "monthly"
) -> list:
"""Takes a FeatureCollection of polygons and splits it into multiple FeatureCollections
based on a specified tile_size
Parameters
----------
temporal_extent: TemporalContext
The full temporal extent that needs to be splitted.
interval: Optional[str] = 'monthly'
The interval size used to split the temporal extent.
Returns
-------
splitted_temporal_extent: list
A list of splitted TemporalContext objects.
"""
start_date = datetime.strptime(temporal_extent.start_date, "%Y-%m-%d")
end_date = datetime.strptime(temporal_extent.end_date, "%Y-%m-%d")

current_start_date = start_date
current_end_date = (current_start_date.replace(day=1) + timedelta(days=32)).replace(
day=1
)

splitted_temporal_extent = []
while current_start_date < end_date:
current_end_date = (
current_start_date.replace(day=1) + timedelta(days=32)
).replace(day=1)

current_end_date = min(current_end_date, end_date)

splitted_temporal_extent.append(
[
current_start_date.strftime("%Y-%m-%d"),
current_end_date.strftime("%Y-%m-%d"),
]
)

current_start_date = current_end_date

return splitted_temporal_extent

0 comments on commit fe2f3ac

Please sign in to comment.