Skip to content

Commit

Permalink
ENH: improve config and handling of weekly and biweekly mosaic periods (
Browse files Browse the repository at this point in the history
  • Loading branch information
theroggy authored May 11, 2024
1 parent 5009904 commit 6d27835
Show file tree
Hide file tree
Showing 29 changed files with 482 additions and 228 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

- Add task/action to automate periodic download of images (#67)
- Add support to calculate indexes locally (#55)
- Improve config and handling of "weekly" and "biweekly" raster image periods (#78)
- Make image profiles to be used in a classification configurable in a config file (#56)
- Run `bulk_zonal_stats` in low priority worker processes (#81)
- Use ruff instead of black and flake for formatting and linting (#57, #64, #65, #67)

### Bugs fixed
Expand Down
29 changes: 15 additions & 14 deletions bin_util/calc_periodic_mosaic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,42 @@
from pathlib import Path

import cropclassification.helpers.config_helper as conf
import cropclassification.preprocess._timeseries_helper as ts_helper
from cropclassification.util import mosaic_util


def main():
logging.basicConfig(level=logging.INFO)

# Init some variables
days_per_period = 7
roi_crs = 31370
# BEFL
start_date = datetime(2023, 2, 6)
end_date = datetime(2023, 7, 30)
dest_image_data_dir = Path("c:/temp/periodic_mosaic/roi_test")
roi_bounds = [20_000, 150_000, 260_000, 245_000]
dest_image_data_dir = Path("//dg3.be/alp/Datagis/satellite_periodic/BEFL")

# roi_test
start_date = datetime(2023, 3, 6)
end_date = datetime(2023, 3, 13)
roi_bounds = [161_400, 188_000, 161_900, 188_500]
dest_image_data_dir = Path("c:/temp/periodic_mosaic/roi_test")

imageprofiles_to_get = ["s1-dprvi-asc-weekly", "s1-dprvi-desc-weekly"]
imageprofiles_to_get = ["s2-agri-weekly"]

image_profiles_path = (
Path(__file__).resolve().parent.parent
/ "sample_marker_basedir/_config/image_profiles.ini"
)
imageprofiles = conf._get_image_profiles(image_profiles_path)

# As we want a weekly calculation, get first monday before the start and stop day.
start_date = ts_helper.get_monday(start_date)
end_date = ts_helper.get_monday(end_date)

_ = mosaic_util.calc_periodic_mosaic(
roi_bounds=[20_000, 150_000, 260_000, 245_000],
# roi_bounds=[161_400, 188_000, 161_900, 188_500],
roi_crs=31370,
roi_bounds=roi_bounds,
roi_crs=roi_crs,
start_date=start_date,
end_date=end_date,
days_per_period=days_per_period,
time_reducer="mean",
output_base_dir=dest_image_data_dir,
# imageprofiles_to_get=["s2-agri", "s2-ndvi"],
imageprofiles_to_get=["s1-dprvi-asc", "s1-dprvi-desc"],
imageprofiles_to_get=imageprofiles_to_get,
imageprofiles=imageprofiles,
force=False,
)
Expand Down
2 changes: 1 addition & 1 deletion ci/envs/latest.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: geofileops-latest
name: cropclass-latest
channels:
- conda-forge
dependencies:
Expand Down
2 changes: 1 addition & 1 deletion ci/envs/minimal.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: geofileops-minimal
name: cropclass-minimal
channels:
- conda-forge
dependencies:
Expand Down
8 changes: 7 additions & 1 deletion cropclassification/calc_marker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

# Import geofilops here already, if tensorflow is loaded first leads to dll load errors
import geofileops as gfo # noqa: F401
import pyproj

from cropclassification.helpers import config_helper as conf
from cropclassification.helpers import dir_helper
Expand Down Expand Up @@ -176,8 +177,11 @@ def calc_marker_task(config_paths: List[Path], default_basedir: Path):
# Get the time series data (eg. S1, S2,...) to be used for the classification
# Result: data is put in files in timeseries_periodic_dir, in one file per
# date/period
period_name = conf.marker.get("period_name", "weekly")
timeseries_periodic_dir = conf.dirs.getpath("timeseries_periodic_dir")
timeseries_periodic_dir = timeseries_periodic_dir / imagedata_input_parcel_path.stem
timeseries_periodic_dir = (
timeseries_periodic_dir / f"{imagedata_input_parcel_path.stem}_{period_name}"
)
start_date_str = conf.marker["start_date_str"]
end_date_str = conf.marker["end_date_str"]
sensordata_to_use = conf.parse_sensordata_to_use(conf.marker["sensordata_to_use"])
Expand All @@ -186,6 +190,8 @@ def calc_marker_task(config_paths: List[Path], default_basedir: Path):
)
ts.calc_timeseries_data(
input_parcel_path=imagedata_input_parcel_path,
roi_bounds=tuple(conf.marker.getlistfloat("roi_bounds")),
roi_crs=pyproj.CRS.from_user_input(conf.marker.get("roi_crs")),
start_date_str=start_date_str,
end_date_str=end_date_str,
sensordata_to_get=sensordata_to_use,
Expand Down
8 changes: 1 addition & 7 deletions cropclassification/calc_periodic_mosaic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import cropclassification.helpers.config_helper as conf
from cropclassification.helpers import log_helper
import cropclassification.preprocess._timeseries_helper as ts_helper
from cropclassification.util import mosaic_util


Expand Down Expand Up @@ -52,17 +51,12 @@ def calc_periodic_mosaic_task(config_paths: List[Path], default_basedir: Path):
Path(conf.marker["image_profiles_config_filepath"])
)

# As we want a weekly calculation, get nearest monday for start and stop day
start_date = ts_helper.get_monday(start_date)
end_date = ts_helper.get_monday(end_date)

if not conf.calc_periodic_mosaic_params.getboolean("simulate"):
_ = mosaic_util.calc_periodic_mosaic(
roi_bounds=(161_000, 188_000, 162_000, 189_000),
roi_bounds=conf.calc_periodic_mosaic_params.getint("roi_bounds"),
roi_crs=conf.calc_periodic_mosaic_params.getint("roi_crs"),
start_date=start_date,
end_date=end_date,
days_per_period=conf.calc_periodic_mosaic_params.getint("days_per_period"),
output_base_dir=Path(
conf.calc_periodic_mosaic_params["dest_image_data_dir"]
),
Expand Down
7 changes: 3 additions & 4 deletions cropclassification/calc_timeseries.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
"""
Calaculate the timeseries data per image on DIAS.
"""
Expand All @@ -17,8 +16,8 @@

from cropclassification.helpers import config_helper as conf
from cropclassification.helpers import log_helper
from cropclassification.util import date_util
from cropclassification.util import zonal_stats_bulk
from cropclassification.preprocess import _timeseries_helper as ts_helper

logger: logging.Logger

Expand All @@ -42,10 +41,10 @@ def calc_timeseries_task(config_paths: List[Path], default_basedir: Path):
test = conf.calc_timeseries_params.getboolean("test")

# As we want a weekly calculation, get nearest monday for start and stop day
start_date = ts_helper.get_monday(
start_date = date_util.get_monday(
conf.marker["start_date_str"]
) # output: vb 2018_2_1 - maandag van week 2 van 2018
end_date = ts_helper.get_monday(conf.marker["end_date_str"])
end_date = date_util.get_monday(conf.marker["end_date_str"])

calc_year_start = start_date.year
calc_year_stop = end_date.year
Expand Down
10 changes: 7 additions & 3 deletions cropclassification/general.ini
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ reuse_last_run_dir_config = False
# The calc_periodic_mosaic_params section contains information to run a calc_marker task
[calc_periodic_mosaic_params]

# The days of the period to download the mosaics
days_per_period = 7
# The start date of the period we want to download the mosaic
start_date_str = MUST_OVERRIDE
# The end date of the period we want to download the mosaic
Expand All @@ -55,7 +53,7 @@ end_date_subtract_days = 3
# The path to download the mosaics
dest_image_data_dir = /tmp/satellite_periodic
# The image profiles to get
imageprofiles_to_get = s2-agri
imageprofiles_to_get = s2-agri-weekly
# The CRS of the roi
roi_crs = 31370
# Simulate, nothing is being downloaded
Expand Down Expand Up @@ -85,12 +83,18 @@ markertype = MUST_OVERRIDE
# Year to use, should be overridden when running
year = MUST_OVERRIDE

# The bounds of the roi to use to prepare images,...
roi_bounds = MUST_OVERRIDE
# The crs the roi bounds are specified in
roi_crs = MUST_OVERRIDE

# start date of timeseries data to use
# remarks: nearest monday will be used + year will be replace in run-time
start_date_str = ${year}-03-27
# end date of timeseries data to use
# remarks: end date is NOT inclusive + year will be replace in run-time
end_date_str = ${year}-08-10

# negative buffer to apply to input parcels
buffer = 5
# minimum number of pixels that should be inside the buffered input parcels
Expand Down
4 changes: 3 additions & 1 deletion cropclassification/helpers/config_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,12 @@ def _get_image_profiles(image_profiles_path: Path) -> Dict[str, ImageProfile]:
satellite=profiles_config[profile].get("satellite"),
index_type=profiles_config[profile].get("index_type"),
image_source=profiles_config[profile].get("image_source"),
base_image_profile=profiles_config[profile].get("base_image_profile"),
collection=profiles_config[profile].get("collection"),
bands=profiles_config[profile].getlist("bands"),
time_reducer=profiles_config[profile].get("time_reducer"),
period_name=profiles_config[profile].get("period_name"),
period_days=profiles_config[profile].getint("period_days"),
base_image_profile=profiles_config[profile].get("base_image_profile"),
max_cloud_cover=profiles_config[profile].getfloat("max_cloud_cover"),
process_options=profiles_config[profile].getdict("process_options"),
job_options=profiles_config[profile].getdict("job_options"),
Expand Down
33 changes: 14 additions & 19 deletions cropclassification/preprocess/_timeseries_calc_openeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import logging
from pathlib import Path
import tempfile
from typing import Dict, List
from typing import Dict, List, Optional, Tuple

import geofileops as gfo
import pyproj
import shapely

from cropclassification.helpers import config_helper as conf
from cropclassification.util import mosaic_util
Expand All @@ -17,6 +19,8 @@

def calculate_periodic_timeseries(
input_parcel_path: Path,
roi_bounds: Tuple[float, float, float, float],
roi_crs: Optional[pyproj.CRS],
start_date: datetime,
end_date: datetime,
imageprofiles_to_get: List[str],
Expand All @@ -31,16 +35,19 @@ def calculate_periodic_timeseries(
args
imageprofiles_to_get: an array with data you want to be calculated.
"""
# As we want a weekly calculation, get nearest monday for start and stop day
days_per_period = 7
roi_info = gfo.get_layerinfo(input_parcel_path)
info = gfo.get_layerinfo(input_parcel_path)
if info.crs is not None and not info.crs.equals(roi_crs):
raise ValueError(f"parcel crs ({info.crs}) <> roi crs ({roi_crs})")
if not shapely.box(*info.total_bounds).within(shapely.box(*roi_bounds)):
raise ValueError(
f"parcel bounds ({info.total_bounds}) not within roi_bounds ({roi_bounds})"
)

periodic_images_result = mosaic_util.calc_periodic_mosaic(
roi_bounds=roi_info.total_bounds,
roi_crs=roi_info.crs,
roi_bounds=roi_bounds,
roi_crs=roi_crs,
start_date=start_date,
end_date=end_date,
days_per_period=days_per_period,
output_base_dir=dest_image_data_dir,
imageprofiles_to_get=imageprofiles_to_get,
imageprofiles=imageprofiles,
Expand All @@ -64,15 +71,3 @@ def calculate_periodic_timeseries(
engine="pyqgis",
nb_parallel=nb_parallel,
)

"""
for image_path in periodic_images:
output_path = (
dest_data_dir / f"{input_parcel_path.stem}__{image_path.stem}.gpkg"
)
geoops_util.zonal_stats(
input_vector_path=input_parcel_path,
input_raster_path=image_path,
output_path=output_path,
)
"""
22 changes: 1 addition & 21 deletions cropclassification/preprocess/_timeseries_helper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
"""
Calculates periodic timeseries for input parcels.
"""
Expand All @@ -8,7 +7,7 @@
import gc
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional

import geofileops as gfo
import numpy as np
Expand All @@ -18,9 +17,6 @@
import cropclassification.helpers.config_helper as conf
import cropclassification.helpers.pandas_helper as pdh

# -------------------------------------------------------------
# First define/init some general variables/constants
# -------------------------------------------------------------
# Get a logger...
logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -641,19 +637,3 @@ def get_fileinfo_timeseries_periods(path: Path) -> dict:
}

return get_fileinfo_timeseries(path)


def get_monday(date: Union[str, datetime]) -> datetime:
"""
This function gets the first monday before the date provided.
It is being used to adapt start_date and end_date so they are mondays, so it
becomes easier to reuse timeseries data
- inputformat: %Y-%m-%d
- outputformat: datetime
"""
if isinstance(date, str):
date = datetime.strptime(date, "%Y-%m-%d")

year_week = date.strftime("%Y_%W")
year_week_monday = datetime.strptime(year_week + "_1", "%Y_%W_%w")
return year_week_monday
19 changes: 10 additions & 9 deletions cropclassification/preprocess/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,23 @@
import logging
import os
from pathlib import Path
from typing import Dict, List
from typing import Dict, List, Optional, Tuple

import pyproj

import cropclassification.helpers.config_helper as conf
import cropclassification.helpers.pandas_helper as pdh
import cropclassification.preprocess._timeseries_helper as ts_helper

# First define/init some general variables/constants
# -------------------------------------------------------------
from cropclassification.util import date_util

# Get a logger...
logger = logging.getLogger(__name__)

# The real work
# -------------------------------------------------------------


def calc_timeseries_data(
input_parcel_path: Path,
roi_bounds: Tuple[float, float, float, float],
roi_crs: Optional[pyproj.CRS],
start_date_str: str,
end_date_str: str,
sensordata_to_get: Dict[str, conf.SensorData],
Expand All @@ -47,10 +46,10 @@ def calc_timeseries_data(
dest_data_dir.mkdir(parents=True, exist_ok=True)

# As we want a weekly calculation, get nearest monday for start and stop day
start_date = ts_helper.get_monday(
start_date = date_util.get_monday(
start_date_str
) # output: vb 2018_2_1 - maandag van week 2 van 2018
end_date = ts_helper.get_monday(end_date_str)
end_date = date_util.get_monday(end_date_str)

logger.info(
f"Start date {start_date_str} converted to monday before: {start_date}, end "
Expand Down Expand Up @@ -84,6 +83,8 @@ def calc_timeseries_data(

ts_calc_openeo.calculate_periodic_timeseries(
input_parcel_path=input_parcel_path,
roi_bounds=roi_bounds,
roi_crs=roi_crs,
start_date=start_date,
end_date=end_date,
imageprofiles_to_get=sensordata_to_get_openeo,
Expand Down
Loading

0 comments on commit 6d27835

Please sign in to comment.