Skip to content

Commit

Permalink
Add viewport downsample algorithm (#6017)
Browse files Browse the repository at this point in the history
Co-authored-by: Philipp Rudiger <[email protected]>
  • Loading branch information
hoxbro and philippjfr authored Jan 30, 2024
1 parent 231cd71 commit c800e52
Show file tree
Hide file tree
Showing 12 changed files with 139 additions and 10 deletions.
12 changes: 12 additions & 0 deletions holoviews/core/data/cudf.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,18 @@ def select_mask(cls, dataset, selection):
mask &= new_mask
return mask

@classmethod
def _select_mask_neighbor(cls, dataset, selection):
"""Runs select mask and expand the True values to include its neighbors
Example
select_mask = [False, False, True, True, False, False]
select_mask_neighbor = [False, True, True, True, True, False]
"""
raise NotImplementedError

@classmethod
def select(cls, dataset, selection_mask=None, **selection):
df = dataset.data
Expand Down
20 changes: 20 additions & 0 deletions holoviews/core/data/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ def select_mask(cls, dataset, selection):
def select(cls, dataset, selection_mask=None, **selection):
df = dataset.data
if selection_mask is not None:
import dask.array as da
if isinstance(selection_mask, da.Array):
return df.loc[selection_mask]
return df[selection_mask]
selection_mask = cls.select_mask(dataset, selection)
indexed = cls.indexed(dataset, selection)
Expand All @@ -162,6 +165,23 @@ def select(cls, dataset, selection_mask=None, **selection):
return df[dataset.vdims[0].name].compute().iloc[0]
return df

@classmethod
def _select_mask_neighbor(cls, dataset, selection):
"""Runs select mask and expand the True values to include its neighbors
Example
select_mask = [False, False, True, True, False, False]
select_mask_neighbor = [False, True, True, True, True, False]
"""
mask = cls.select_mask(dataset, selection)
mask = mask.to_dask_array().compute_chunk_sizes()
extra = mask[1:] ^ mask[:-1]
mask[1:] |= extra
mask[:-1] |= extra
return mask

@classmethod
def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
index_dims = [dataset.get_dimension(d) for d in dimensions]
Expand Down
12 changes: 12 additions & 0 deletions holoviews/core/data/ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,18 @@ def select_mask(cls, dataset, selection):
predicates.append(column == object)
return predicates

@classmethod
def _select_mask_neighbor(cls, dataset, selection):
"""Runs select mask and expand the True values to include its neighbors
Example
select_mask = [False, False, True, True, False, False]
select_mask_neighbor = [False, True, True, True, True, False]
"""
raise NotImplementedError

@classmethod
def sample(cls, dataset, samples=None):
import ibis
Expand Down
15 changes: 15 additions & 0 deletions holoviews/core/data/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,21 @@ def select_mask(cls, dataset, selection):
mask &= index_mask
return mask

@classmethod
def _select_mask_neighbor(cls, dataset, selection):
"""Runs select mask and expand the True values to include its neighbors
Example
select_mask = [False, False, True, True, False, False]
select_mask_neighbor = [False, True, True, True, True, False]
"""
mask = cls.select_mask(dataset, selection)
extra = mask[1:] ^ mask[:-1]
mask[1:] |= extra
mask[:-1] |= extra
return mask

@classmethod
def indexed(cls, dataset, selection):
Expand Down
42 changes: 37 additions & 5 deletions holoviews/operation/downsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,13 @@ def _nth_point(x, y, n_out, **kwargs):
y (np.ndarray): The y-values of the data.
n_out (int): The number of output points.
Returns:
np.array: The indexes of the selected datapoints.
slice: The slice of selected datapoints.
"""
n_samples = len(x)
return np.arange(0, n_samples, max(1, math.ceil(n_samples / n_out)))
return slice(0, n_samples, max(1, math.ceil(n_samples / n_out)))

def _viewport(x, y, n_out, **kwargs):
return slice(len(x))

def _min_max(x, y, n_out, **kwargs):
try:
Expand Down Expand Up @@ -191,6 +194,7 @@ def _m4(x, y, n_out, **kwargs):
_ALGORITHMS = {
'lttb': _lttb,
'nth': _nth_point,
'viewport': _viewport,
'minmax': _min_max,
'minmax-lttb': _min_max_lttb,
'm4': _m4,
Expand All @@ -207,13 +211,14 @@ class downsample1d(ResampleOperation1D):
algorithm = param.Selector(default='lttb', objects=list(_ALGORITHMS), doc="""
The algorithm to use for downsampling:
- `lttb`: Largest Triangle Three Buckets downsample algorithm
- `lttb`: Largest Triangle Three Buckets downsample algorithm.
- `nth`: Selects every n-th point.
- `viewport`: Selects all points in a given viewport.
- `minmax`: Selects the min and max value in each bin (requires tsdownsampler).
- `m4`: Selects the min, max, first and last value in each bin (requires tsdownsampler).
- `minmax-lttb`: First selects n_out * minmax_ratio min and max values,
then further reduces these to n_out values using the
Largest Triangle Three Buckets algorithm. (requires tsdownsampler)""")
Largest Triangle Three Buckets algorithm (requires tsdownsampler).""")

parallel = param.Boolean(default=False, doc="""
The number of threads to use (if tsdownsampler is available).""")
Expand All @@ -223,6 +228,10 @@ class downsample1d(ResampleOperation1D):
values to generate with the minmax algorithm before further
downsampling with LTTB.""")

neighbor_points = param.Boolean(default=None, doc="""
Whether to add the neighbor points to the range before downsampling.
By default this is only enabled for the viewport algorithm.""")

def _process(self, element, key=None):
if isinstance(element, (Overlay, NdOverlay)):
_process = partial(self._process, key=key)
Expand All @@ -233,7 +242,8 @@ def _process(self, element, key=None):
return element.clone(elements)

if self.p.x_range:
element = element[slice(*self.p.x_range)]
mask = self._compute_mask(element)
element = element[mask]
if len(element) <= self.p.width:
return element
xs, ys = (element.dimension_values(i) for i in range(2))
Expand All @@ -249,3 +259,25 @@ def _process(self, element, key=None):
kwargs['minmax_ratio'] = self.p.minmax_ratio
samples = downsample(xs, ys, self.p.width, parallel=self.p.parallel, **kwargs)
return element.iloc[samples]

def _compute_mask(self, element):
"""
Computes the mask to apply to the element before downsampling.
"""
neighbor_enabled = (
self.p.neighbor_points
if self.p.neighbor_points is not None
else self.p.algorithm == "viewport"
)
if not neighbor_enabled:
return slice(*self.p.x_range)
try:
mask = element.dataset.interface._select_mask_neighbor(
element.dataset, {element.kdims[0]: self.p.x_range}
)
except NotImplementedError:
mask = slice(*self.p.x_range)
except Exception as e:
self.param.warning(f"Could not apply neighbor mask to downsample1d: {e}")
mask = slice(*self.p.x_range)
return mask
7 changes: 7 additions & 0 deletions holoviews/plotting/bokeh/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ def set_callback(self, handle):
if self.on_changes:
change_handler = lambda attr, old, new: (
asyncio.create_task(self.on_change(attr, old, new))
if self.plot.document else None
)
for change in self.on_changes:
if change in ['patching', 'streaming']:
Expand Down Expand Up @@ -645,6 +646,12 @@ class RangeXYCallback(Callback):
'y1': 'cb_obj.y1',
}

def initialize(self, plot_id=None):
super().initialize(plot_id)
for stream in self.streams:
msg = self._process_msg({})
stream.update(**msg)

def _process_msg(self, msg):
if self.plot.state.x_range is not self.plot.handles['x_range']:
x_range = self.plot.handles['x_range']
Expand Down
3 changes: 0 additions & 3 deletions holoviews/plotting/bokeh/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,6 @@ def _update_datasource(self, source, data):
"""
Update datasource with data for a new frame.
"""
if not self.document:
return

data = self._postprocess_data(data)
empty = all(len(v) == 0 for v in data.values())
if (self.streaming and self.streaming[0].data is self.current_frame.data
Expand Down
8 changes: 8 additions & 0 deletions holoviews/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -1480,6 +1480,10 @@ class RangeX(LinkedStream):
x_range = param.Tuple(default=None, length=2, constant=True, doc="""
Range of the x-axis of a plot in data coordinates""")

def _set_stream_parameters(self, **kwargs):
kwargs.pop("y_range", None)
super()._set_stream_parameters(**kwargs)


class RangeY(LinkedStream):
"""
Expand All @@ -1489,6 +1493,10 @@ class RangeY(LinkedStream):
y_range = param.Tuple(default=None, length=2, constant=True, doc="""
Range of the y-axis of a plot in data coordinates""")

def _set_stream_parameters(self, **kwargs):
kwargs.pop("x_range", None)
super()._set_stream_parameters(**kwargs)


class BoundsXY(LinkedStream):
"""
Expand Down
6 changes: 6 additions & 0 deletions holoviews/tests/core/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,12 @@ def test_dataset_transform_add_ht(self):
kdims=self.kdims, vdims=self.vdims+['combined'])
self.assertEqual(transformed, expected)

def test_select_with_neighbor(self):
select = self.table.interface.select_mask(self.table.dataset, {"Weight": 18})
select_neighbor = self.table.interface._select_mask_neighbor(self.table.dataset, dict(Weight=18))

np.testing.assert_almost_equal(select, [False, True, False])
np.testing.assert_almost_equal(select_neighbor, [True, True, True])


class ScalarColumnTests:
Expand Down
7 changes: 7 additions & 0 deletions holoviews/tests/core/data/test_cudfinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,10 @@ def test_dataset_groupby_second_dim(self):

def test_dataset_aggregate_string_types_size(self):
raise SkipTest("cuDF does not support variance aggregation")

def test_select_with_neighbor(self):
try:
# Not currently supported by CuDF
super().test_select_with_neighbor()
except NotImplementedError:
raise SkipTest("Not supported")
7 changes: 7 additions & 0 deletions holoviews/tests/core/data/test_ibisinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,13 @@ def test_aggregation_operations(self):

self.compare_dataset(expected, result, msg=str(agg))

def test_select_with_neighbor(self):
try:
# Not currently supported by Ibis
super().test_select_with_neighbor()
except NotImplementedError:
raise SkipTest("Not supported")

if not IbisInterface.has_rowid():

def test_dataset_iloc_slice_rows_slice_cols(self):
Expand Down
10 changes: 8 additions & 2 deletions holoviews/tests/operation/test_downsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
except ImportError:
tsdownsample = None

algorithms = _ALGORITHMS.copy()
algorithms.pop('viewport', None) # viewport return slice(len(data)) no matter the width

@pytest.mark.parametrize("plottype", ["overlay", "ndoverlay"])
def test_downsample1d_multi(plottype):
Expand All @@ -26,7 +28,7 @@ def test_downsample1d_multi(plottype):
assert value.size == downsample1d.width


@pytest.mark.parametrize("algorithm", _ALGORITHMS.values(), ids=_ALGORITHMS)
@pytest.mark.parametrize("algorithm", algorithms.values(), ids=algorithms)
def test_downsample_algorithm(algorithm, unimport):
unimport("tsdownsample")
x = np.arange(1000)
Expand All @@ -37,14 +39,18 @@ def test_downsample_algorithm(algorithm, unimport):
except NotImplementedError:
pytest.skip("not testing tsdownsample algorithms")
else:
if isinstance(result, slice):
result = x[result]
assert result.size == width


@pytest.mark.skipif(not tsdownsample, reason="tsdownsample not installed")
@pytest.mark.parametrize("algorithm", _ALGORITHMS.values(), ids=_ALGORITHMS)
@pytest.mark.parametrize("algorithm", algorithms.values(), ids=algorithms)
def test_downsample_algorithm_with_tsdownsample(algorithm):
x = np.arange(1000)
y = np.random.rand(1000)
width = 20
result = algorithm(x, y, width)
if isinstance(result, slice):
result = x[result]
assert result.size == width

0 comments on commit c800e52

Please sign in to comment.