Skip to content

Commit

Permalink
First draft of top level masking functions for bitmask and enumerated…
Browse files Browse the repository at this point in the history
… masks
  • Loading branch information
alexgleith committed Nov 20, 2024
1 parent a00c882 commit 479dc32
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 0 deletions.
14 changes: 14 additions & 0 deletions odc/geo/_xr_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@
resolve_fill_value,
resolve_nodata,
)
from .masking import (
bits_to_bool,
enum_to_bool,
scale_and_offset,
scale_and_offset_dataset,
)
from .overlap import compute_output_geobox
from .roi import roi_is_empty
from .types import Nodata, Resolution, SomeNodata, SomeResolution, SomeShape, xy_
Expand Down Expand Up @@ -1053,6 +1059,12 @@ def nodata(self, value: Nodata):

colorize = _wrap_op(colorize)

scale_and_offset = _wrap_op(scale_and_offset)

bits_to_bool = _wrap_op(bits_to_bool)

enum_to_bool = _wrap_op(enum_to_bool)

if have.rasterio:
write_cog = _wrap_op(write_cog)
to_cog = _wrap_op(to_cog)
Expand Down Expand Up @@ -1093,6 +1105,8 @@ def to_rgba(
) -> xarray.DataArray:
return to_rgba(self._xx, bands=bands, vmin=vmin, vmax=vmax)

scale_and_offset = _wrap_op(scale_and_offset_dataset)


ODCExtensionDs.to_rgba.__doc__ = to_rgba.__doc__

Expand Down
121 changes: 121 additions & 0 deletions odc/geo/masking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# This file is part of the Open Data Cube, see https://opendatacube.org for more information
#
# Copyright (c) 2015-2020 ODC Contributors
# SPDX-License-Identifier: Apache-2.0
"""
Functions around supporting cloud masking.
"""

from xarray import DataArray, Dataset


def bits_to_bool(
xx: DataArray, bits: list[int] | None, bitflags: int | None, invert: bool = False
) -> DataArray:
"""
Convert integer array into boolean array using bitmasks.
:param xx: DataArray with integer values
:param bits: List of bit positions to convert to a bitflag mask (e.g. [0, 1, 2] -> 0b111)
:param bitflags: Integer value with bits set that will be used to extract the boolean mask (e.g. 0b00011000)
:param invert: Invert the mask
:return: DataArray with boolean values
"""
assert not (
bits is None and bitflags is None
), "Either bits or bitflags must be provided"
assert not (
bits is not None and bitflags is not None
), "Only one of bits or bitflags can be provided"

if bitflags is None:
bitflags = 0

if bits is not None:
for b in bits:
bitflags |= 1 << b

mask = (xx & bitflags) != 0

if invert:
mask = ~mask

return mask


def enum_to_bool(xx: DataArray, values: list, invert: bool = False) -> DataArray:
"""
Convert array into boolean array using a list of invalid values.
:param xx: DataArray with integer values
:param values: List of valid values to convert to a boolean mask
:param invert: Invert the mask
:return: DataArray with boolean values
"""

mask = xx.isin(values)

if invert:
mask = ~mask

return mask


def scale_and_offset(
xx: DataArray,
scale: float | None,
offset: float | None,
ignore_missing: bool = False,
) -> DataArray:
"""
Apply scale and offset to the DataArray. Leave scale and offset blank to use
the values from the DataArray's attrs.
:param xx: DataArray with integer values
:param scale: Scale factor
:param offset: Offset
:return: DataArray with scaled and offset values
"""

# Scales and offsets is used by GDAL.
if scale is None:
scale = xx.attrs.get("scales")

if offset is None:
offset = xx.attrs.get("offsets")

# Catch the case where one is provided and not the other...
if scale is None and offset is not None:
scale = 1.0

if offset is None and scale is not None:
offset = 0.0

if scale is not None and offset is not None:
xx = xx * scale + offset
else:
if not ignore_missing:
raise ValueError(
"Scale and offset not provided and not found in attrs.scales and attrs.offset"
)

return xx


def scale_and_offset_dataset(
xx: Dataset, scale: float | None, offset: float | None
) -> Dataset:
"""
Apply scale and offset to the Dataset. Leave scale and offset blank to use
the values from each DataArray's attrs.
:param xx: Dataset with integer values
:param scale: Scale factor
:param offset: Offset
:return: Dataset with scaled and offset values
"""

for var in xx.data_vars:
xx[var] = scale_and_offset(xx[var], scale, offset, ignore_missing=True)

return xx
52 changes: 52 additions & 0 deletions tests/test_masking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from odc.geo.masking import bits_to_bool, enum_to_bool, scale_and_offset

from xarray import DataArray

# Top left is cloud, top right is cloud shadow
# Bottom left is both cloud and cloud shadow, bottom right is neither
xx_bits = DataArray(
[[0b00010000, 0b00001000], [0b00011000, 0b00000000]], dims=("y", "x")
)

# Set up a 2x2 8 bit integer DataArray with some
# values set to 3 (shadow), 9 (high confidence cloud).
xx_values = DataArray([[3, 9], [3, 0]], dims=("y", "x"))


# Test bits_to_bool
def test_bits_to_bool():
# Test with bits
mask = bits_to_bool(xx_bits, bits=[4, 3], bitflags=None)
assert mask.equals(DataArray([[True, True], [True, False]], dims=("y", "x")))

# Test with bitflags
mask = bits_to_bool(xx_bits, bits=None, bitflags=0b00011000)
assert mask.equals(DataArray([[True, True], [True, False]], dims=("y", "x")))

# Test with invert
mask = bits_to_bool(xx_bits, bits=[4, 3], bitflags=None, invert=True)
assert mask.equals(DataArray([[False, False], [False, True]], dims=("y", "x")))

mask = bits_to_bool(xx_bits, bits=None, bitflags=0b00010000, invert=True)
assert mask.equals(DataArray([[False, True], [False, True]], dims=("y", "x")))


# Test enum_to_bool
def test_enum_to_bool():
mask = enum_to_bool(xx_values, values=[3, 9])
assert mask.equals(DataArray([[True, True], [True, False]], dims=("y", "x")))

mask = enum_to_bool(xx_values, values=[3, 9], invert=True)
assert mask.equals(DataArray([[False, False], [False, True]], dims=("y", "x")))


# Test apply_scale_and_offset
def test_scale_and_offset():
mask = scale_and_offset(xx_values, scale=1.0, offset=0.0)
assert mask.equals(DataArray([[3, 9], [3, 0]], dims=("y", "x")))

mask = scale_and_offset(xx_values, scale=None, offset=None, ignore_missing=True)
assert mask.equals(DataArray([[3, 9], [3, 0]], dims=("y", "x")))

mask = scale_and_offset(xx_values, scale=2.0, offset=1.0)
assert mask.equals(DataArray([[7, 19], [7, 1]], dims=("y", "x")))

0 comments on commit 479dc32

Please sign in to comment.