Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Tiered annotations #81

Open
wants to merge 37 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
66289d4
Type annotations for annotation.py and part of timeline.py
hadware Dec 9, 2019
ed1d2d6
Type hinting for timelines is mostly done.
hadware Dec 9, 2019
aa96d5b
Added a "typedef" file for commonly used types in the library. Most t…
hadware Dec 10, 2019
e5163fa
Merge branch 'develop' of github.com:pyannote/pyannote-core into develop
hadware Dec 11, 2019
eae045e
Fixed tests to accomodate for the namedtuples -> dataclass conversion…
hadware Dec 11, 2019
fbbd935
Merge branch 'develop' of github.com:pyannote/pyannote-core into develop
hadware Dec 11, 2019
d55e232
fixing dataclasses install on 3.7.
hadware Dec 11, 2019
4497d40
Added hints for notebook.py and a couple of helpers. Fixed some incor…
hadware Dec 11, 2019
d19be1a
Added typing_extension dependency, correct annotation.py as per comme…
hadware Dec 12, 2019
6042969
Corrected typing for segment.py and feature.py. Deprecated non-PEP8 m…
hadware Dec 12, 2019
ac552b4
Corrected timeline.py and notebook.py type hinting (and some other on…
hadware Dec 12, 2019
7a94a48
Switched Generators to Iterators types hints. Fixed the Flake8 test f…
hadware Dec 12, 2019
acb581c
Changed `SegmentCropMode` to `Alignment`
hadware Dec 13, 2019
4444ddc
For some reason the timeline cropping functions slipped through the c…
hadware Dec 13, 2019
234d00f
Tweaking the TL cropping type hinting.
hadware Dec 13, 2019
bbac3a3
Bugfix on the type hint.
hadware Dec 13, 2019
e436aa5
chore: use relative import for Alignment
hbredin Dec 13, 2019
39ee094
Merge branch 'develop' of github.com:pyannote/pyannote-core into develop
hadware Dec 18, 2019
79b188b
Merge branch 'develop' of github.com:pyannote/pyannote-core into develop
hadware Aug 29, 2020
ee9310d
Merge remote-tracking branch 'origin/develop' into develop
hadware Aug 29, 2020
f73836a
Started implementing the textgrid classes from an annotation and time…
hadware Sep 29, 2020
6f23520
Merge branch 'develop' of github.com:pyannote/pyannote-core into tier…
hadware Feb 3, 2021
0b5092d
pre-checkout commit
hadware Jun 23, 2021
e9aa581
Merge remote-tracking branch 'main/develop' into tiered_annotations
hadware Aug 24, 2022
40a190a
Changed class names, added textgrid-parser dependency
hadware Aug 24, 2022
b604637
Switching to a timeline-based tier system
hadware Aug 25, 2022
c458895
Added abstract class for all segmentation classes
hadware Sep 6, 2022
8eadcfd
Attempts at creating abstract classes for all annotations
hadware Jan 30, 2023
226ec8f
Set up the base classes, their abstract methods and the inheritance h…
hadware Feb 10, 2023
0cea04a
Starting implem of base functions and setting up classes
hadware Feb 14, 2023
d6f8c7c
Some work on the implemention of methods for tiers.
hadware Feb 22, 2023
08a03e4
Attempts at creating abstract classes for all annotations
hadware Mar 2, 2023
f94d651
Post discussion commit
hadware Mar 26, 2023
ede6d84
Woops
hadware Mar 26, 2023
8978b11
Implemented methods for tiered objects.
hadware Apr 6, 2023
e166f3f
Crop method for partitions.
hadware Apr 25, 2023
fc3baf3
Implemented crop for all tier types. Restricted regular tiers to non-…
hadware Apr 27, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions pyannote/core/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@
PYANNOTE_TRACK,
PYANNOTE_LABEL,
)
from .base import BaseSegmentation, GappedAnnotationMixin
from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT
from .segment import Segment, SlidingWindow
from .timeline import Timeline
Expand All @@ -145,7 +146,7 @@
import pandas as pd


class Annotation:
class Annotation(GappedAnnotationMixin, BaseSegmentation):
"""Annotation

Parameters
Expand Down Expand Up @@ -175,7 +176,7 @@ def from_df(

def __init__(self, uri: Optional[str] = None, modality: Optional[str] = None):

self._uri: Optional[str] = uri
super().__init__(uri)
self.modality: Optional[str] = modality

# sorted dictionary
Expand Down Expand Up @@ -207,7 +208,7 @@ def uri(self, uri: str):
timeline.uri = uri
self._uri = uri

def _updateLabels(self):
def _update_labels(self):

# list of labels that needs to be updated
update = set(
Expand Down Expand Up @@ -293,7 +294,7 @@ def itertracks(
else:
yield segment, track

def _updateTimeline(self):
def _update_timeline(self):
self._timeline = Timeline(segments=self._tracks, uri=self.uri)
self._timelineNeedsUpdate = False

Expand All @@ -319,7 +320,7 @@ def get_timeline(self, copy: bool = True) -> Timeline:

"""
if self._timelineNeedsUpdate:
self._updateTimeline()
self._update_timeline()
if copy:
return self._timeline.copy()
return self._timeline
Expand All @@ -332,18 +333,18 @@ def __eq__(self, other: "Annotation"):
Two annotations are equal if and only if their tracks and associated
labels are equal.
"""
pairOfTracks = itertools.zip_longest(
pair_of_tracks = itertools.zip_longest(
self.itertracks(yield_label=True), other.itertracks(yield_label=True)
)
return all(t1 == t2 for t1, t2 in pairOfTracks)
return all(t1 == t2 for t1, t2 in pair_of_tracks)

def __ne__(self, other: "Annotation"):
"""Inequality"""
pairOfTracks = itertools.zip_longest(
pair_of_tracks = itertools.zip_longest(
self.itertracks(yield_label=True), other.itertracks(yield_label=True)
)

return any(t1 != t2 for t1, t2 in pairOfTracks)
return any(t1 != t2 for t1, t2 in pair_of_tracks)

def __contains__(self, included: Union[Segment, Timeline]):
"""Inclusion
Expand Down Expand Up @@ -559,6 +560,7 @@ def crop(self, support: Support, mode: CropMode = "intersection") -> "Annotation
else:
raise NotImplementedError("unsupported mode: '%s'" % mode)

# TODO: remove (already in base class)
def extrude(
self, removed: Support, mode: CropMode = "intersection"
) -> "Annotation":
Expand Down Expand Up @@ -634,7 +636,7 @@ def get_overlap(self, labels: Optional[Iterable[Label]] = None) -> "Timeline":
annotation.get_overlap()
|------| |-----| |--------|

annotation.get_overlap(for_labels=["A", "B"])
annotation.get_overlap(labels=["A", "B"])
|--| |--| |----|

Parameters
Expand Down Expand Up @@ -913,7 +915,7 @@ def labels(self) -> List[Label]:
Sorted list of labels
"""
if any([lnu for lnu in self._labelNeedsUpdate.values()]):
self._updateLabels()
self._update_labels()
return sorted(self._labels, key=str)

def get_labels(
Expand Down Expand Up @@ -1060,7 +1062,7 @@ def label_timeline(self, label: Label, copy: bool = True) -> Timeline:
return Timeline(uri=self.uri)

if self._labelNeedsUpdate[label]:
self._updateLabels()
self._update_labels()

if copy:
return self._labels[label].copy()
Expand Down
282 changes: 282 additions & 0 deletions pyannote/core/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
from abc import ABCMeta, abstractmethod
from typing import Optional, Iterator, Tuple, Union, Dict, TYPE_CHECKING, Callable, List, Set, Iterable

from sortedcontainers import SortedList
from typing_extensions import Self

from pyannote.core import Segment
from pyannote.core.utils.types import Support, CropMode, ContiguousSupport, Label

if TYPE_CHECKING:
from .timeline import Timeline


class BaseSegmentation(metaclass=ABCMeta):
"""Abstract base class for all segmented annotations"""

def __init__(self, uri: Optional[str] = None):
# path to (or any identifier of) segmented resource
self._uri: Optional[str] = uri

@property
def uri(self):
return self._uri

@uri.setter
def uri(self, uri: str):
self._uri = uri

@abstractmethod
def __len__(self) -> int:
pass

def __nonzero__(self):
return self.__bool__()

@abstractmethod
def __bool__(self):
"""Truthiness of the segmentation. Truthy means that it contains something
False means it's empty."""
pass

@abstractmethod
def __eq__(self, other: Self):
pass

@abstractmethod
def __ne__(self, other: Self):
pass

def __matmul__(self, other: Union['BaseSegmentation', Segment]):
return self.co_iter(other)

@abstractmethod
def itersegments(self):
pass

def segments_set(self) -> Set[Segment]:
# default implementation, may be overriden for better performance
return set(self.itersegments())

def get_timeline(self) -> 'Timeline':
from .timeline import Timeline
return Timeline(self.itersegments())

@abstractmethod
def update(self, other: Self) -> Self:
pass

def co_iter(self, other: Union['BaseSegmentation', Segment]) -> Iterator[Tuple[Segment, Segment]]:
if isinstance(other, Segment):
other_segments = SortedList([other])
else:
other_segments = SortedList(other.itersegments())

# TODO maybe wrap self.itersegs in a sortedlist as well?
for segment in self.itersegments():

# iterate over segments that starts before 'segment' ends
temp = Segment(start=segment.end, end=segment.end)
for other_segment in other_segments.irange(maximum=temp):
if segment.intersects(other_segment):
yield segment, other_segment

@abstractmethod
def __str__(self):
pass

@abstractmethod
def __repr__(self):
pass

def __contains__(self, included: Union[Segment, 'BaseSegmentation']) -> bool:
# Base implementation, may be overloaded for better performance
seg_set = self.segments_set()
if isinstance(included, Segment):
return included in seg_set
elif isinstance(included, BaseSegmentation):
return seg_set.issuperset(included.segments_set())
else:
raise ValueError("")

@abstractmethod
def empty(self) -> Self:
pass

@abstractmethod
def copy(self) -> Self:
pass

@abstractmethod
def extent(self) -> Segment:
pass

@abstractmethod
def duration(self) -> float:
pass

@abstractmethod
def _repr_png_(self):
pass


# TODO: rename to SegmentSet?
class GappedAnnotationMixin(BaseSegmentation):

@abstractmethod
def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]:
pass

@abstractmethod
def gaps(self, support: Optional[Support] = None) -> 'Timeline':
pass

def extrude(self,
removed: Support,
mode: CropMode = 'intersection') -> Self:
"""Remove segments that overlap `removed` support.

Parameters
----------
removed : Segment or Timeline
If `support` is a `Timeline`, its support is used.
mode : {'strict', 'loose', 'intersection'}, optional
Controls how segments that are not fully included in `removed` are
handled. 'strict' mode only removes fully included segments. 'loose'
mode removes any intersecting segment. 'intersection' mode removes
the overlapping part of any intersecting segment.

Returns
-------
extruded : Timeline
Extruded timeline

Examples
--------

>>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 5)])
>>> timeline.extrude(Segment(1, 2))
<Timeline(uri=None, segments=[<Segment(0, 1)>, <Segment(3, 5)>])>

>>> timeline.extrude(Segment(1, 3), mode='loose')
<Timeline(uri=None, segments=[<Segment(3, 5)>])>

>>> timeline.extrude(Segment(1, 3), mode='strict')
<Timeline(uri=None, segments=[<Segment(0, 2)>, <Segment(3, 5)>])>

"""
if isinstance(removed, Segment):
removed = Timeline([removed])
else:
removed = removed.get_timeline()

extent_tl = Timeline([self.extent()], uri=self.uri)
truncating_support = removed.gaps(support=extent_tl)
# loose for truncate means strict for crop and vice-versa
if mode == "loose":
mode = "strict"
elif mode == "strict":
mode = "loose"
return self.crop(truncating_support, mode=mode)

@abstractmethod
def crop(self,
support: Support,
mode: CropMode = 'intersection',
returns_mapping: bool = False) \
-> Union[Self, Tuple[Self, Dict[Segment, Segment]]]:
pass

@abstractmethod
def support(self, collar: float = 0.) -> Self:
pass

@abstractmethod
def get_overlap(self) -> 'Timeline':
pass


class ContiguousAnnotationMixin(BaseSegmentation):
# TODO : figure out if the return mapping still makes sense
# (propably not)

def co_iter(self, other: Union['BaseSegmentation', Segment]) -> Iterator[Tuple[Segment, Segment]]:
if not isinstance(other, (ContiguousAnnotationMixin, Segment)):
return super().co_iter(other)

# we're dealing with another contiguous segmentation, things can be much quicker
if isinstance(other, Segment):
other_segments = SortedList([other])
else:
other_segments = SortedList(other.itersegments())
my_segments = SortedList(self.itersegments())
try:
seg_a: Segment = my_segments.pop(0)
seg_b: Segment = other_segments.pop(0)
while True:
if seg_a.intersects(seg_b):
yield seg_a, seg_b
if seg_b.end < seg_a.end:
seg_b = other_segments.pop(0)
else:
seg_a = other_segments.pop(0)
except IndexError: # exhausting any of the stacks: yielding nothing and ending
yield from ()

@abstractmethod
def crop(self,
support: ContiguousSupport,
mode: CropMode = 'intersection',
returns_mapping: bool = False) \
-> Union[Self, Tuple[Self, Dict[Segment, Segment]]]:
# TODO: add errors messages explaining why the support isn't of the right type
pass

@abstractmethod
def bisect(self, at: float):
pass


class PureSegmentationMixin(metaclass=ABCMeta):
"""A segmentation containing _only_ segments"""

# TODO: add __and__ (defaults to crop intersection, not in place), that only takes objects of Self type?

# TODO: can actually take any BaseSegmentation for add & remove

@abstractmethod
def crop_iter(self,
support: Support,
mode: CropMode = 'intersection',
returns_mapping: bool = False) \
-> Iterator[Union[Tuple[Segment, Segment], Segment]]:
pass

@abstractmethod
def add(self, segment: Segment):
pass

@abstractmethod
def remove(self, segment: Segment):
pass

# TODO: maybe could be in BaseSegmentation
@abstractmethod
def index(self, segment: Segment) -> int:
pass

# TODO: maybe could be in BaseSegmentation
@abstractmethod
def overlapping(self, t: float) -> List[Segment]:
pass

@abstractmethod
def __iter__(self) -> Iterable[Segment]:
pass


class AnnotatedSegmentationMixin(metaclass=ABCMeta):

@abstractmethod
def __iter__(self) -> Iterable[Tuple[Segment, Label]]:
pass
Loading