Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add extended component #2373

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions glue/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .subset import Subset # noqa
from .subset_group import SubsetGroup # noqa
from .visual import VisualAttributes # noqa
from .regions import layer_to_subset # noqa

# We import this last to avoid circular imports
from .application_base import Application # noqa
100 changes: 99 additions & 1 deletion glue/core/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
DASK_INSTALLED = False

__all__ = ['Component', 'DerivedComponent', 'CategoricalComponent',
'CoordinateComponent', 'DateTimeComponent']
'CoordinateComponent', 'DateTimeComponent', 'ExtendedComponent']


class Component(object):
Expand Down Expand Up @@ -107,6 +107,13 @@ def datetime(self):
"""
return False

@property
def extended(self):
"""
Whether or not or not the datatype is extended
"""
return False

def __str__(self):
return "%s with shape %s" % (self.__class__.__name__, shape_to_string(self.shape))

Expand Down Expand Up @@ -475,6 +482,97 @@ def to_series(self, **kwargs):
return pd.Series(self.labels, dtype=object, **kwargs)


class ExtendedComponent(Component):
"""
Container for data that describes an extent, region, or range

This component can be used when a dataset describes regions, perhaps
with information about those regions, and it does not make sense to
convert all those regions to glue subsets.

Data loaders are required to know how to provide these extended regions
as lists of Shapely objects and explicitly create these components.
If a tabular dataset provides e.g. a range over another component or a
circular region then an ExtendedComponent needs to be explicitly created
from these definitions.

A circular region can be represented as:

>>> circle = shapely.Point(x, y).buffer(rad)

This provides a polygon approximation of a circle, which can be made more
exact by providing quad_segs to buffer (default is 16, provide >16 for more
precision).

A range in one dimension can be represented as:

>>> range = shapely.LineString([[x0,0],[x1,0]])

This is a bit of an odd representation, since we are forced to specify a y
coordinate for this line. We adopt a convention of y == 0 but this should
not be required. For something like a very large number of ranges over the
gene (e.g. genes) this might be more expensive a representation than
we need or want.

Data Viewers can choose to implement custom Layer Artists for Data objects
with extended components. Since we need to create a LayerArtist when we add
the data, this requires a Data Viewer to know whether it is displaying the
ExtendedComponent when the Data is added.

NOTE that this implementation does not support regions in more than
two dimensions. (Shapely has partial support for 3D shapes, but not more).

Geopandas plots Shapely objects in a somewhat optimized way
https://github.com/geopandas/geopandas/blob/00e3748c987f5b9a14b5df5233421710811d75bf/geopandas/plotting.py#L323
Matplotlib has some notes on performance increases:
https://matplotlib.org/stable/users/explain/performance.html

How to draw ellipses using Shapely
https://stackoverflow.com/questions/13105915/draw-an-ellipse-using-shapely

Parameters
----------
data : :class:`~shapely.Geometry`
The underlying array of Shapely geometries
parent_components: `list`
The list of regular components over which the Shapely geometries extend
units : `str`, optional
Unit description.
"""

def __init__(self, data, parent_component_ids=[], units=None):
import shapely
# Is this expensive for large data sets?
if not all(isinstance(x, shapely.Geometry) for x in data):
raise TypeError(
"Input data for a ExtendedComponent should a list of shapely.Geometry objects"
)
if len(parent_component_ids) == 2:
self.parent_component_id_x = parent_component_ids[0]
self.parent_component_id_y = parent_component_ids[1]
elif len(parent_component_ids) == 1:
self.parent_component_id_x = parent_component_ids[0]
self.parent_component_id_y = None
self.units = units
self._data = data

@property
def extended(self):
return True

@property
def numeric(self):
return False

@property
def datetime(self):
return False

@property
def categorical(self):
return False


class DateTimeComponent(Component):
"""
A component representing a date/time.
Expand Down
63 changes: 61 additions & 2 deletions glue/core/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
# Note: leave all the following imports for component and component_id since
# they are here for backward-compatibility (the code used to live in this
# file)
from glue.core.component import Component, CoordinateComponent, DerivedComponent
from glue.core.component import Component, CoordinateComponent, DerivedComponent, ExtendedComponent
from glue.core.component_id import ComponentID, ComponentIDDict, PixelComponentID

try:
Expand Down Expand Up @@ -1429,6 +1429,8 @@ def get_kind(self, cid):
return 'numerical'
elif comp.categorical:
return 'categorical'
elif comp.extended:
return 'extended'
else:
raise TypeError("Unknown data kind")

Expand Down Expand Up @@ -1545,7 +1547,7 @@ def update_components(self, mapping):

# alert hub of the change
if self.hub is not None:
msg = NumericalDataChangedMessage(self)
msg = NumericalDataChangedMessage(self, components_changed=list(mapping.keys()))
self.hub.broadcast(msg)

for subset in self.subsets:
Expand Down Expand Up @@ -2053,6 +2055,63 @@ def visible_components(self):
if not isinstance(comp, CoordinateComponent) and cid.parent is self]


class RegionData(Data):
"""
Data that describes a set of regions and (potentially) the properties of
those regions.

Components describing regions must be explicitly created as ExtendedComponents
and passed into this object at initialization time.
"""

def __init__(self, label="", coords=None, **kwargs):
self._extended_component_ids = ComponentIDList()
self.ext_x = None
self.ext_y = None

# __init__ calls add_component which deals with ExtendedComponent logic
super().__init__(label=label, coords=coords, **kwargs)

def check_extended_components(self):
for compid, comp in self._components.items():
if isinstance(comp, ExtendedComponent):
self._extended_component_ids.append(compid)
self.ext_x = self.get_component(compid).parent_component_id_x
self.ext_y = self.get_component(compid).parent_component_id_y

num_ext = len(self._extended_component_ids)
if num_ext > 1:
raise Exception("RegionData has {0} extended_components, but should only have 1".format(num_ext))
elif num_ext < 1:
raise Exception("RegionData must be created with 1 extended_component")

@contract(component='component_like', label='cid_like')
def add_component(self, component, label):
""" Add a new component to this data set, allowing only one ExtendedComponent

Parameters
----------
component : :class:`~glue.core.component.Component` or array-like
Object to add.
label : `str` or :class:`~glue.core.component_id.ComponentID`
The label. If this is a string, a new
:class:`glue.core.component_id.ComponentID`
with this label will be created and associated with the Component.

Raises
------
`ValueError`, if the RegionData already has an extended component
"""
if isinstance(component, ExtendedComponent):
if len(self._extended_component_ids) > 0:
raise ValueError("Cannot add a derived component as a first component")
else:
super().add_component(component, label)
self.check_extended_components()
else:
super().add_component(component, label)


@contract(i=int, ndim=int)
def pixel_label(i, ndim):
label = "{0}".format(i)
Expand Down
6 changes: 6 additions & 0 deletions glue/core/layer_artist.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,12 @@ def _check_subset_state_changed(self):
self._changed = True
self._state = state

def update_component_limits(self, components_changed):
"""
Update component limits for this layer
"""
pass

def __str__(self):
return "%s for %s" % (self.__class__.__name__, self.layer.label)

Expand Down
5 changes: 4 additions & 1 deletion glue/core/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,10 @@ def __init__(self, sender, attribute, tag=None):


class NumericalDataChangedMessage(DataMessage):
pass

def __init__(self, sender, components_changed, tag=None):
super(NumericalDataChangedMessage, self).__init__(sender, tag=tag)
self.components_changed = components_changed


class DataCollectionMessage(Message):
Expand Down
41 changes: 41 additions & 0 deletions glue/core/regions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Functions to support data that defines regions
"""
import numpy as np

from glue.core.roi import PolygonalROI
from glue.core.data import RegionData

from glue.config import layer_action
from glue.core.subset import RoiSubsetState, MultiOrState


def reg_to_roi(reg):
if reg.geom_type == "Polygon":
ext_coords = np.array(reg.exterior.coords.xy)
roi = PolygonalROI(vx=ext_coords[0], vy=ext_coords[1]) # Need to account for interior rings
return roi


@layer_action(label='Convert regions to subset', single=True, subset=True)
def layer_to_subset(layer, data_collection):
"""
This should be limited to the case where subset.Data is RegionData
and/or return a warning when applied to some other kind of data.
"""
if isinstance(layer.data, RegionData):

extended_comp = layer.data._extended_component_ids[0]
regions = layer[extended_comp]
list_of_rois = [reg_to_roi(region) for region in regions]

roisubstates = [RoiSubsetState(layer.data.ext_x,
layer.data.ext_y,
roi=roi
)
for roi in list_of_rois]
if len(list_of_rois) > 1:
composite_substate = MultiOrState(roisubstates)
else:
composite_substate = roisubstates[0]
subset_group = data_collection.new_subset_group(subset_state=composite_substate)
2 changes: 1 addition & 1 deletion glue/core/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,7 +1197,7 @@ def _save_session(session, context):
@loader(np.ndarray)
def _load_numpy(rec, context):
s = BytesIO(b64decode(rec['data']))
return np.load(s)
return np.load(s, allow_pickle=True)


@saver(np.ndarray)
Expand Down
49 changes: 48 additions & 1 deletion glue/core/tests/test_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
from unittest.mock import MagicMock

from astropy.wcs import WCS
from shapely.geometry import MultiPolygon, Polygon, Point

from glue import core
from glue.tests.helpers import requires_astropy

from ..coordinates import Coordinates
from ..component import (Component, DerivedComponent, CoordinateComponent,
CategoricalComponent)
CategoricalComponent, ExtendedComponent)
from ..component_id import ComponentID
from ..data import Data
from ..parse import ParsedCommand, ParsedComponentLink
Expand Down Expand Up @@ -403,3 +404,49 @@ def test_coordinate_component_1d_coord():

data = Data(flux=np.random.random(5), coords=wcs, label='data')
np.testing.assert_equal(data['Frequency'], [1, 2, 3, 4, 5])


class TestExtendedComponent(object):

def setup_method(self, method):
poly_1 = Polygon([(20, 20), (60, 20), (60, 40), (20, 40)])
poly_2 = Polygon([(60, 50), (60, 70), (80, 70), (80, 50)])
poly_3 = Polygon([(10, 10), (15, 10), (15, 15), (10, 15)])
poly_4 = Polygon([(10, 20), (15, 20), (15, 30), (10, 30), (12, 25)])

polygons = MultiPolygon([poly_3, poly_4])

polys = np.array([poly_1, poly_2, polygons])

representative_points = [s.representative_point() for s in polys]

cen_x_id = ComponentID('x')
cen_y_id = ComponentID('y')

center_x = Component(np.array([s.x for s in representative_points]))
center_y = Component(np.array([s.y for s in representative_points]))

self.reg_comp_2d_poly = ExtendedComponent(polys, parent_component_ids=[cen_x_id, cen_y_id])

circle_1 = Point(1.0, 0.0).buffer(1)
circle_2 = Point(2.0, 3.0).buffer(2)

polys = np.array([circle_1, circle_2])

representative_points = [s.representative_point() for s in polys]

def test_autodetection(self):
assert Component.autotyped(self.array_data).categorical
assert Component.autotyped(self.list_data).categorical

x = np.array([True, False, True, False])
assert not Component.autotyped(x).categorical

x = np.array([1, 2, 3, 4])
assert not Component.autotyped(x).categorical

x = np.array(['1', '2', '3', '4'])
assert not Component.autotyped(x).categorical

d = Data(x=['a', 'b', 'c'])
assert d.get_component('x').categorical
Loading