Skip to content

Commit

Permalink
Add bulk zonal stats option using pyqgis - first version (theroggy#42)
Browse files Browse the repository at this point in the history
  • Loading branch information
theroggy authored May 4, 2023
1 parent c6ff7a1 commit 8c64963
Show file tree
Hide file tree
Showing 12 changed files with 519 additions and 246 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### Improvements

- Improve performance of zonal_stats_bulk (#38)
- Use black to comply to pep8 + minor general improvements (#13)
- Upgrade all dependencies (#12)
- Add support for pandas 2.0 (#41)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Once you have anaconda installed, you can open an anaconda terminal window and f
```
2. Install the dependencies for the crop classification scripts:
```
conda install python=3.9 geopandas geofileops "h5py<3" openeo psutil rasterio rasterstats scikit-learn
conda install python=3.9 geopandas geofileops "h5py<3" openeo psutil qgis rasterio rasterstats scikit-learn
```
3. If it was the first time you installed anaconda/geopandas, you might have to restart your computer to proceed
4. Start the anaconda terminal window again and activate the environment
Expand Down
12 changes: 6 additions & 6 deletions cropclassification/calc_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,9 @@ def calc_timeseries_task(config_paths: List[Path], default_basedir: Path):
try:
images_bands = [(path, ["VV", "VH"]) for path in input_image_paths]
zonal_stats_bulk.zonal_stats(
features_path=input_features_path,
vector_path=input_features_path,
id_column=conf.columns["id"],
images_bands=images_bands,
rasters_bands=images_bands,
output_dir=output_dir,
temp_dir=temp_dir,
log_dir=log_dir,
Expand Down Expand Up @@ -248,9 +248,9 @@ def calc_timeseries_task(config_paths: List[Path], default_basedir: Path):
bands = conf.timeseries.getlist("s2bands")
images_bands = [(path, bands) for path in input_image_paths]
zonal_stats_bulk.zonal_stats(
features_path=input_features_path,
vector_path=input_features_path,
id_column=conf.columns["id"],
images_bands=images_bands,
rasters_bands=images_bands,
output_dir=output_dir,
temp_dir=temp_dir,
log_dir=log_dir,
Expand Down Expand Up @@ -292,9 +292,9 @@ def calc_timeseries_task(config_paths: List[Path], default_basedir: Path):
try:
images_bands = [(path, ["VV", "VH"]) for path in input_image_paths]
zonal_stats_bulk.zonal_stats(
features_path=input_features_path,
vector_path=input_features_path,
id_column=conf.columns["id"],
images_bands=images_bands,
rasters_bands=images_bands,
output_dir=output_dir,
temp_dir=temp_dir,
log_dir=log_dir,
Expand Down
4 changes: 3 additions & 1 deletion cropclassification/predict/classification_keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,9 @@ def predict_proba(
logger.info(f"Resulting Columns for predicting data: {parcel_data_df.columns}")

# Load the classifier and predict
model = tf.keras.models.load_model(classifier_path)
model = tf.keras.models.load_model(str(classifier_path))
if model is None:
raise RuntimeError(f"Error loading model {classifier_path}")
logger.info(f"Predict classes with probabilities: {len(parcel_df.index)} rows")
class_proba = model.predict(parcel_data_df)
logger.info("Predict classes with probabilities ready")
Expand Down
9 changes: 4 additions & 5 deletions cropclassification/preprocess/_timeseries_calc_openeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,12 @@ def calculate_periodic_timeseries(
if temp_dir == "None":
temp_dir = Path(tempfile.gettempdir())
zonal_stats_bulk.zonal_stats(
features_path=input_parcel_path,
vector_path=input_parcel_path,
id_column=conf.columns["id"],
images_bands=images_bands,
rasters_bands=images_bands,
output_dir=dest_data_dir,
temp_dir=temp_dir,
log_dir=conf.dirs.getpath("log_dir"),
log_level=conf.general.get("log_level"),
stats=["count", "mean", "median", "std", "min", "max"],
engine="pyqgis",
)

"""
Expand Down
49 changes: 21 additions & 28 deletions cropclassification/util/zonal_stats_bulk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
from pathlib import Path
from typing import List, Optional, Tuple, Union
from typing import List, Literal, Optional, Tuple

from ._raster_helper import * # noqa: F401, F403


def zonal_stats(
features_path: Path,
vector_path: Path,
id_column: str,
images_bands: List[Tuple[Path, List[str]]],
rasters_bands: List[Tuple[Path, List[str]]],
output_dir: Path,
temp_dir: Path,
log_dir: Path,
log_level: Union[str, int],
stats: Literal["count", "mean", "median", "std", "min", "max"],
cloud_filter_band: Optional[str] = None,
calc_bands_parallel: bool = True,
engine: str = "rasterstats",
Expand All @@ -21,44 +19,39 @@ def zonal_stats(
Calculate zonal statistics.
Args:
features_path (Path): _description_
vector_path (Path): _description_
id_column (str): _description_
images_bands (List[Tuple[Path, List[str]]]): _description_
rasters_bands (List[Tuple[Path, List[str]]]): _description_
output_dir (Path): _description_
temp_dir (Path): _description_
log_dir (Path): _description_
log_level (Union[str, int]): _description_
force (bool, optional): _description_. Defaults to False.
Raises:
Exception: _description_
"""
if engine == "pyqgis":
from _zonal_stats_bulk_pyqgis import zonal_stats
if cloud_filter_band is not None:
raise ValueError(
'cloud_filter_band parameter not supported for engine "pyqgis"'
)
from . import _zonal_stats_bulk_pyqgis

return zonal_stats(
features_path=features_path,
id_column=id_column,
images_bands=images_bands,
return _zonal_stats_bulk_pyqgis.zonal_stats(
vector_path=vector_path,
rasters_bands=rasters_bands,
output_dir=output_dir,
temp_dir=temp_dir,
log_dir=log_dir,
log_level=log_level,
cloud_filter_band=cloud_filter_band,
calc_bands_parallel=calc_bands_parallel,
stats=stats,
columns=[id_column],
force=force,
)
elif engine == "rasterstats":
from _zonal_stats_bulk_rs import zonal_stats
from . import _zonal_stats_bulk_rs

return zonal_stats(
features_path=features_path,
return _zonal_stats_bulk_rs.zonal_stats(
vector_path=vector_path,
id_column=id_column,
images_bands=images_bands,
rasters_bands=rasters_bands,
output_dir=output_dir,
temp_dir=temp_dir,
log_dir=log_dir,
log_level=log_level,
stats=stats,
cloud_filter_band=cloud_filter_band,
calc_bands_parallel=calc_bands_parallel,
force=force,
Expand Down
37 changes: 21 additions & 16 deletions cropclassification/util/zonal_stats_bulk/_general_helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime
import logging
from pathlib import Path
from typing import Optional
from typing import Optional, Union

logger = logging.getLogger(__name__)

Expand All @@ -11,7 +11,7 @@ def _format_output_path(
image_path: Path,
output_dir: Path,
orbit_properties_pass: Optional[str],
band: Optional[str],
band: Optional[Union[str, int]],
) -> Path:
"""
Prepare the output path.
Expand Down Expand Up @@ -40,9 +40,9 @@ def _format_output_path(
def _format_progress_message(
nb_todo: int,
nb_done_total: int,
nb_done_latestbatch: int,
start_time: datetime,
start_time_latestbatch: datetime,
nb_done_latestbatch: Optional[int] = None,
start_time_latestbatch: Optional[datetime] = None,
) -> str:
"""
Returns a progress message based on the input.
Expand All @@ -55,9 +55,11 @@ def _format_progress_message(
start_time_latestbatch: datetime the latest batch started
"""
time_passed_s = (datetime.now() - start_time).total_seconds()
time_passed_latestbatch_s = (
datetime.now() - start_time_latestbatch
).total_seconds()
time_passed_latestbatch_s = None
if start_time_latestbatch is not None:
time_passed_latestbatch_s = (
datetime.now() - start_time_latestbatch
).total_seconds()

# Calculate the overall progress
large_number = 9999999999
Expand All @@ -68,17 +70,20 @@ def _format_progress_message(
hours_to_go = (int)((nb_todo - nb_done_total) / nb_per_hour)
min_to_go = (int)((((nb_todo - nb_done_total) / nb_per_hour) % 1) * 60)

# Calculate the speed of the latest batch
if time_passed_latestbatch_s > 0:
# Format message
message = (
f"{hours_to_go}:{min_to_go} left for {nb_todo-nb_done_total} todo at "
f"{nb_per_hour:0.0f}/h"
)
# Add speed of the latest batch to message if appropriate
if (
time_passed_latestbatch_s is not None
and nb_done_latestbatch is not None
and time_passed_latestbatch_s > 0
):
nb_per_hour_latestbatch = (
nb_done_latestbatch / time_passed_latestbatch_s
) * 3600
else:
nb_per_hour_latestbatch = large_number
message = f"{message} ({nb_per_hour_latestbatch:0.0f}/h last batch)"

# Return formatted message
message = (
f"{hours_to_go}:{min_to_go} left for {nb_todo-nb_done_total} todo at "
f"{nb_per_hour:0.0f}/h ({nb_per_hour_latestbatch:0.0f}/h last batch)"
)
return message
Loading

0 comments on commit 8c64963

Please sign in to comment.