From 74bdf2e39419133f33d8972711f880a449b6dd6b Mon Sep 17 00:00:00 2001 From: Daniel Baston Date: Sun, 17 Dec 2023 13:46:16 -0500 Subject: [PATCH] Propagate raster nodata to missing results in output --- src/operation.h | 35 ++++++++++++++++++++++++++--------- src/raster_stats.h | 3 +-- test/test_cli.py | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 59 insertions(+), 14 deletions(-) diff --git a/src/operation.h b/src/operation.h index b4373b2b..8a0b6b96 100644 --- a/src/operation.h +++ b/src/operation.h @@ -110,9 +110,25 @@ class Operation virtual void set_result(const StatsRegistry& reg, const Feature& f_in, Feature& f_out) const { static const StatsRegistry::RasterStatsVariant empty_stats = RasterStats(); + + constexpr bool write_if_missing = true; // should we set attribute values if the feature did not intersect the raster? + if (!write_if_missing && !reg.contains(f_in, *this)) { + return; + } + const auto& stats = reg.contains(f_in, *this) ? reg.stats(f_in, *this) : empty_stats; - auto missing = std::numeric_limits::quiet_NaN(); + // FIXME don't read an empty box every time, maybe cache this in the source? + auto empty_rast = values->read_box(Box::make_empty()); + + auto missing = std::visit([](const auto& r) { + std::variant ret = std::numeric_limits::quiet_NaN(); + if (r->has_nodata()) { + ret = r->nodata(); + } + return ret; + }, + empty_rast); if (stat == "mean") { std::visit([&f_out, this](const auto& x) { f_out.set(m_field_names[0], x.mean()); }, stats); @@ -125,13 +141,13 @@ class Operation } else if (stat == "weighted_sum") { std::visit([&f_out, this](const auto& x) { f_out.set(m_field_names[0], x.weighted_sum()); }, stats); } else if (stat == "min") { - std::visit([&f_out, &missing, this](const auto& x) { f_out.set(m_field_names[0], x.min().value_or(missing)); }, stats); + std::visit([&f_out, this](const auto& x, const auto& m) { f_out.set(m_field_names[0], x.min().value_or(m)); }, stats, missing); } else if (stat == "max") { - std::visit([&f_out, &missing, this](const auto& x) { f_out.set(m_field_names[0], x.max().value_or(missing)); }, stats); + std::visit([&f_out, this](const auto& x, const auto& m) { f_out.set(m_field_names[0], x.max().value_or(m)); }, stats, missing); } else if (stat == "majority" || stat == "mode") { - std::visit([&f_out, &missing, this](const auto& x) { f_out.set(m_field_names[0], x.mode().value_or(missing)); }, stats); + std::visit([&f_out, this](const auto& x, const auto& m) { f_out.set(m_field_names[0], x.mode().value_or(m)); }, stats, missing); } else if (stat == "minority") { - std::visit([&f_out, &missing, this](const auto& x) { f_out.set(m_field_names[0], x.minority().value_or(missing)); }, stats); + std::visit([&f_out, this](const auto& x, const auto& m) { f_out.set(m_field_names[0], x.minority().value_or(m)); }, stats, missing); } else if (stat == "variety") { std::visit([&f_out, this](const auto& x) { f_out.set(m_field_names[0], x.variety()); }, stats); } else if (stat == "stdev") { @@ -145,14 +161,15 @@ class Operation } else if (stat == "coefficient_of_variation") { std::visit([&f_out, this](const auto& x) { f_out.set(m_field_names[0], x.coefficient_of_variation()); }, stats); } else if (stat == "median") { - std::visit([&f_out, &missing, this](const auto& x) { f_out.set(m_field_names[0], x.quantile(0.5).value_or(missing)); }, stats); + std::visit([&f_out, this](const auto& x, const auto& m) { f_out.set(m_field_names[0], x.quantile(0.5).value_or(m)); }, stats, missing); } else if (stat == "quantile") { - std::visit([&f_out, &missing, this](const auto& x) { + std::visit([&f_out, this](const auto& x, const auto& m) { for (std::size_t i = 0; i < m_quantiles.size(); i++) { - f_out.set(m_field_names[i], x.quantile(m_quantiles[i]).value_or(missing)); + f_out.set(m_field_names[i], x.quantile(m_quantiles[i]).value_or(m)); } }, - stats); + stats, + missing); } else if (stat == "frac") { std::visit([&f_out](const auto& x) { for (const auto& value : x) { diff --git a/src/raster_stats.h b/src/raster_stats.h index 9364a854..4dac88a8 100644 --- a/src/raster_stats.h +++ b/src/raster_stats.h @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2022 ISciences, LLC. +// Copyright (c) 2018-2023 ISciences, LLC. // All rights reserved. // // This software is licensed under the Apache License, Version 2.0 (the "License"). @@ -15,7 +15,6 @@ #define EXACTEXTRACT_RASTER_STATS_H #include -#include #include #include #include diff --git a/test/test_cli.py b/test/test_cli.py index 794781f0..a8d57746 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -29,8 +29,6 @@ def runner(*args, **kwargs): cmd = [str(x) for x in ["./exactextract", "-o", output_fname] + arglist] - # print(' '.join(cmd)) - subprocess.run(cmd, check=True) with open(output_fname, "r") as f: @@ -45,7 +43,7 @@ def write_raster(tmp_path): files = [] - def writer(data): + def writer(data, nodata=None): fname = str(tmp_path / f"raster{len(files)}.tif") @@ -65,6 +63,9 @@ def writer(data): ds.GetRasterBand(1).WriteArray(data) + if nodata: + ds.GetRasterBand(1).SetNoDataValue(nodata) + return fname return writer @@ -166,12 +167,40 @@ def test_feature_not_intersecting_raster(strategy, run, write_raster, write_feat fid="id", raster=f"value:{write_raster(data)}", stat=["count(value)", "mean(value)"], + strategy=strategy, ) assert len(rows) == 1 assert rows[0] == {"id": "1", "value_count": "0", "value_mean": "nan"} +@pytest.mark.parametrize("strategy", ("feature-sequential", "raster-sequential")) +@pytest.mark.parametrize("dtype,nodata", [(np.float32, None), (np.int32, -999)]) +def test_feature_intersecting_nodata( + strategy, run, write_raster, write_features, dtype, nodata +): + + data = np.full((4, 3), nodata or np.nan, dtype=dtype) + + rows = run( + polygons=write_features( + {"id": 1, "geom": "POLYGON ((0.5 0.5, 2.5 0.5, 2.5 2, 0.5 2, 0.5 0.5))"} + ), + fid="id", + raster=f"metric:{write_raster(data, nodata)}", + stat=["count(metric)", "mean(metric)", "mode(metric)"], + strategy=strategy, + ) + + assert len(rows) == 1 + assert rows[0] == { + "id": "1", + "metric_count": "0", + "metric_mean": "nan", + "metric_mode": str(nodata) if nodata else "nan", + } + + @pytest.mark.parametrize("strategy", ("feature-sequential", "raster-sequential")) def test_include_cols(strategy, run, write_raster, write_features):