From fd11c8991da1cce96524cfc5b960ee50544fd5b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= <hbredin@users.noreply.github.com>
Date: Thu, 9 Jan 2025 16:55:17 +0100
Subject: [PATCH 1/4] chore: use bool instead of np.bool

---
 pyannote/metrics/binary_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyannote/metrics/binary_classification.py b/pyannote/metrics/binary_classification.py
index 1c80738..b8839c1 100644
--- a/pyannote/metrics/binary_classification.py
+++ b/pyannote/metrics/binary_classification.py
@@ -130,7 +130,7 @@ class _Passthrough(BaseEstimator):
 
     def __init__(self):
         super().__init__()
-        self.classes_ = np.array([False, True], dtype=np.bool)
+        self.classes_ = np.array([False, True], dtype=bool)
 
     def fit(self, scores, y_true):
         return self

From 8b8285f5594b63883ba4b127062901a6fdec7abb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= <hbredin@users.noreply.github.com>
Date: Thu, 9 Jan 2025 17:00:33 +0100
Subject: [PATCH 2/4] BREAKING: improve diarization purity/coverage

---
 pyannote/metrics/diarization.py | 449 +++++++++++---------------------
 1 file changed, 150 insertions(+), 299 deletions(-)

diff --git a/pyannote/metrics/diarization.py b/pyannote/metrics/diarization.py
index 8bc583d..4e7a176 100755
--- a/pyannote/metrics/diarization.py
+++ b/pyannote/metrics/diarization.py
@@ -3,7 +3,7 @@
 
 # The MIT License (MIT)
 
-# Copyright (c) 2012-2019 CNRS
+# Copyright (c) 2012- CNRS
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -44,7 +44,7 @@
     pass
 
 # TODO: can't we put these as class attributes?
-DER_NAME = 'diarization error rate'
+DER_NAME = "diarization error rate"
 
 
 class DiarizationErrorRate(IdentificationErrorRate):
@@ -102,15 +102,16 @@ class DiarizationErrorRate(IdentificationErrorRate):
     def metric_name(cls) -> str:
         return DER_NAME
 
-    def __init__(self, collar: float = 0.0, skip_overlap: bool = False,
-                 **kwargs):
+    def __init__(self, collar: float = 0.0, skip_overlap: bool = False, **kwargs):
         super().__init__(collar=collar, skip_overlap=skip_overlap, **kwargs)
         self.mapper_ = HungarianMapper()
 
-    def optimal_mapping(self,
-                        reference: Annotation,
-                        hypothesis: Annotation,
-                        uem: Optional[Timeline] = None) -> Dict[Label, Label]:
+    def optimal_mapping(
+        self,
+        reference: Annotation,
+        hypothesis: Annotation,
+        uem: Optional[Timeline] = None,
+    ) -> Dict[Label, Label]:
         """Optimal label mapping
 
         Parameters
@@ -136,26 +137,32 @@ def optimal_mapping(self,
         # call hungarian mapper
         return self.mapper_(hypothesis, reference)
 
-    def compute_components(self,
-                           reference: Annotation,
-                           hypothesis: Annotation,
-                           uem: Optional[Timeline] = None,
-                           **kwargs) -> Details:
+    def compute_components(
+        self,
+        reference: Annotation,
+        hypothesis: Annotation,
+        uem: Optional[Timeline] = None,
+        **kwargs
+    ) -> Details:
         # crop reference and hypothesis to evaluated regions (uem)
         # remove collars around reference segment boundaries
         # remove overlap regions (if requested)
         reference, hypothesis, uem = self.uemify(
-            reference, hypothesis, uem=uem,
-            collar=self.collar, skip_overlap=self.skip_overlap,
-            returns_uem=True)
+            reference,
+            hypothesis,
+            uem=uem,
+            collar=self.collar,
+            skip_overlap=self.skip_overlap,
+            returns_uem=True,
+        )
         # NOTE that this 'uemification' must be done here because it
         # might have an impact on the search for the optimal mapping.
 
         # make sure reference only contains string labels ('A', 'B', ...)
-        reference = reference.rename_labels(generator='string')
+        reference = reference.rename_labels(generator="string")
 
         # make sure hypothesis only contains integer labels (1, 2, ...)
-        hypothesis = hypothesis.rename_labels(generator='int')
+        hypothesis = hypothesis.rename_labels(generator="int")
 
         # optimal (int --> str) mapping
         mapping = self.optimal_mapping(reference, hypothesis)
@@ -164,10 +171,9 @@ def compute_components(self,
         # NOTE that collar is set to 0.0 because 'uemify' has already
         # been applied (same reason for setting skip_overlap to False)
         mapped = hypothesis.rename_labels(mapping=mapping)
-        return super(DiarizationErrorRate, self) \
-            .compute_components(reference, mapped, uem=uem,
-                                collar=0.0, skip_overlap=False,
-                                **kwargs)
+        return super(DiarizationErrorRate, self).compute_components(
+            reference, mapped, uem=uem, collar=0.0, skip_overlap=False, **kwargs
+        )
 
 
 class GreedyDiarizationErrorRate(IdentificationErrorRate):
@@ -226,13 +232,16 @@ def metric_name(cls):
 
     def __init__(self, collar: float = 0.0, skip_overlap: bool = False, **kwargs):
         super(GreedyDiarizationErrorRate, self).__init__(
-            collar=collar, skip_overlap=skip_overlap, **kwargs)
+            collar=collar, skip_overlap=skip_overlap, **kwargs
+        )
         self.mapper_ = GreedyMapper()
 
-    def greedy_mapping(self,
-                       reference: Annotation,
-                       hypothesis: Annotation,
-                       uem: Optional[Timeline] = None) -> Dict[Label, Label]:
+    def greedy_mapping(
+        self,
+        reference: Annotation,
+        hypothesis: Annotation,
+        uem: Optional[Timeline] = None,
+    ) -> Dict[Label, Label]:
         """Greedy label mapping
 
         Parameters
@@ -252,26 +261,32 @@ def greedy_mapping(self,
             reference, hypothesis = self.uemify(reference, hypothesis, uem=uem)
         return self.mapper_(hypothesis, reference)
 
-    def compute_components(self,
-                           reference: Annotation,
-                           hypothesis: Annotation,
-                           uem: Optional[Timeline] = None,
-                           **kwargs) -> Details:
+    def compute_components(
+        self,
+        reference: Annotation,
+        hypothesis: Annotation,
+        uem: Optional[Timeline] = None,
+        **kwargs
+    ) -> Details:
         # crop reference and hypothesis to evaluated regions (uem)
         # remove collars around reference segment boundaries
         # remove overlap regions (if requested)
         reference, hypothesis, uem = self.uemify(
-            reference, hypothesis, uem=uem,
-            collar=self.collar, skip_overlap=self.skip_overlap,
-            returns_uem=True)
+            reference,
+            hypothesis,
+            uem=uem,
+            collar=self.collar,
+            skip_overlap=self.skip_overlap,
+            returns_uem=True,
+        )
         # NOTE that this 'uemification' must be done here because it
         # might have an impact on the search for the greedy mapping.
 
         # make sure reference only contains string labels ('A', 'B', ...)
-        reference = reference.rename_labels(generator='string')
+        reference = reference.rename_labels(generator="string")
 
         # make sure hypothesis only contains integer labels (1, 2, ...)
-        hypothesis = hypothesis.rename_labels(generator='int')
+        hypothesis = hypothesis.rename_labels(generator="int")
 
         # greedy (int --> str) mapping
         mapping = self.greedy_mapping(reference, hypothesis)
@@ -280,15 +295,14 @@ def compute_components(self,
         # NOTE that collar is set to 0.0 because 'uemify' has already
         # been applied (same reason for setting skip_overlap to False)
         mapped = hypothesis.rename_labels(mapping=mapping)
-        return super(GreedyDiarizationErrorRate, self) \
-            .compute_components(reference, mapped, uem=uem,
-                                collar=0.0, skip_overlap=False,
-                                **kwargs)
+        return super(GreedyDiarizationErrorRate, self).compute_components(
+            reference, mapped, uem=uem, collar=0.0, skip_overlap=False, **kwargs
+        )
 
 
-JER_NAME = 'jaccard error rate'
-JER_SPEAKER_ERROR = 'speaker error'
-JER_SPEAKER_COUNT = 'speaker count'
+JER_NAME = "jaccard error rate"
+JER_SPEAKER_ERROR = "speaker error"
+JER_SPEAKER_COUNT = "speaker count"
 
 
 class JaccardErrorRate(DiarizationErrorRate):
@@ -365,31 +379,35 @@ def metric_components(cls) -> MetricComponents:
         ]
 
     def __init__(self, collar=0.0, skip_overlap=False, **kwargs):
-        super().__init__(
-            collar=collar, skip_overlap=skip_overlap, **kwargs)
+        super().__init__(collar=collar, skip_overlap=skip_overlap, **kwargs)
         self.mapper_ = HungarianMapper()
 
-    def compute_components(self,
-                           reference: Annotation,
-                           hypothesis: Annotation,
-                           uem: Optional[Timeline] = None,
-                           **kwargs) -> Details:
-
+    def compute_components(
+        self,
+        reference: Annotation,
+        hypothesis: Annotation,
+        uem: Optional[Timeline] = None,
+        **kwargs
+    ) -> Details:
         # crop reference and hypothesis to evaluated regions (uem)
         # remove collars around reference segment boundaries
         # remove overlap regions (if requested)
         reference, hypothesis, uem = self.uemify(
-            reference, hypothesis, uem=uem,
-            collar=self.collar, skip_overlap=self.skip_overlap,
-            returns_uem=True)
+            reference,
+            hypothesis,
+            uem=uem,
+            collar=self.collar,
+            skip_overlap=self.skip_overlap,
+            returns_uem=True,
+        )
         # NOTE that this 'uemification' must be done here because it
         # might have an impact on the search for the optimal mapping.
 
         # make sure reference only contains string labels ('A', 'B', ...)
-        reference = reference.rename_labels(generator='string')
+        reference = reference.rename_labels(generator="string")
 
         # make sure hypothesis only contains integer labels (1, 2, ...)
-        hypothesis = hypothesis.rename_labels(generator='int')
+        hypothesis = hypothesis.rename_labels(generator="int")
 
         # optimal (str --> int) mapping
         mapping = self.optimal_mapping(hypothesis, reference)
@@ -397,7 +415,6 @@ def compute_components(self,
         detail = self.init_components()
 
         for ref_speaker in reference.labels():
-
             hyp_speaker = mapping.get(ref_speaker, None)
 
             if hyp_speaker is None:
@@ -411,7 +428,7 @@ def compute_components(self,
                 # [miss] is equal to total
 
                 # overall: jer = (fa + miss) / total = (0 + total) / total = 1
-                jer = 1.
+                jer = 1.0
 
             else:
                 # total is the duration of the union of reference and system
@@ -439,9 +456,9 @@ def compute_metric(self, detail: Details) -> float:
         return detail[JER_SPEAKER_ERROR] / detail[JER_SPEAKER_COUNT]
 
 
-PURITY_NAME = 'purity'
-PURITY_TOTAL = 'total'
-PURITY_CORRECT = 'correct'
+PURITY_NAME = "purity"
+PURITY_TOTAL = "total"
+PURITY_CORRECT = "correct"
 
 
 class DiarizationPurity(UEMSupportMixin, BaseMetric):
@@ -449,17 +466,6 @@ class DiarizationPurity(UEMSupportMixin, BaseMetric):
 
     A hypothesized annotation has perfect purity if all of its labels overlap
     only segments which are members of a single reference label.
-
-    Parameters
-    ----------
-    weighted : bool, optional
-        When True (default), each cluster is weighted by its overall duration.
-    collar : float, optional
-        Duration (in seconds) of collars removed from evaluation around
-        boundaries of reference segments.
-    skip_overlap : bool, optional
-        Set to True to not evaluate overlap regions.
-        Defaults to False (i.e. keep overlap regions).
     """
 
     @classmethod
@@ -470,226 +476,64 @@ def metric_name(cls):
     def metric_components(cls):
         return [PURITY_TOTAL, PURITY_CORRECT]
 
-    def __init__(self, collar: float = 0.0, skip_overlap: bool = False,
-                 weighted: bool = True, **kwargs):
-        super(DiarizationPurity, self).__init__(**kwargs)
-        self.weighted = weighted
-        self.collar = collar
-        self.skip_overlap = skip_overlap
-
-    def compute_components(self,
-                           reference: Annotation,
-                           hypothesis: Annotation,
-                           uem: Optional[Timeline] = None,
-                           **kwargs) -> Details:
-
+    def compute_components(
+        self,
+        reference: Annotation,
+        hypothesis: Annotation,
+        uem: Optional[Timeline] = None,
+        **kwargs
+    ) -> Details:
         detail = self.init_components()
 
-        # crop reference and hypothesis to evaluated regions (uem)
-        reference, hypothesis = self.uemify(
-            reference, hypothesis, uem=uem,
-            collar=self.collar, skip_overlap=self.skip_overlap)
-
-        if not reference:
-            return detail
-
-        # cooccurrence matrix
-        matrix = reference * hypothesis
-
-        # duration of largest class in each cluster
-        largest = matrix.max(axis=0)
-        duration = matrix.sum(axis=0)
+        duration = 0
+        largest = 0
 
-        if self.weighted:
-            detail[PURITY_CORRECT] = 0.
-            if np.prod(matrix.shape):
-                detail[PURITY_CORRECT] = largest.sum()
-            detail[PURITY_TOTAL] = duration.sum()
+        for label in hypothesis.labels():
+            support = hypothesis.label_timeline(label)
+            duration += support.duration()
+            chart = reference.crop(support).chart()
+            largest += 0 if not chart else chart[0][1]
 
-        else:
-            detail[PURITY_CORRECT] = (largest / duration).sum()
-            detail[PURITY_TOTAL] = len(largest)
+        detail[PURITY_TOTAL] = duration
+        detail[PURITY_CORRECT] = largest
 
         return detail
 
     def compute_metric(self, detail: Details) -> float:
-        if detail[PURITY_TOTAL] > 0.:
+        if detail[PURITY_TOTAL] > 0.0:
             return detail[PURITY_CORRECT] / detail[PURITY_TOTAL]
-        return 1.
+        return 1.0
 
 
-COVERAGE_NAME = 'coverage'
+COVERAGE_NAME = "coverage"
 
 
 class DiarizationCoverage(DiarizationPurity):
     """Cluster coverage
 
     A hypothesized annotation has perfect coverage if all segments from a
-    given reference label are clustered in the same cluster.
-
-    Parameters
-    ----------
-    weighted : bool, optional
-        When True (default), each cluster is weighted by its overall duration.
-    collar : float, optional
-        Duration (in seconds) of collars removed from evaluation around
-        boundaries of reference segments.
-    skip_overlap : bool, optional
-        Set to True to not evaluate overlap regions.
-        Defaults to False (i.e. keep overlap regions).
+    given reference label are assigned to the same cluster.
     """
 
     @classmethod
     def metric_name(cls):
         return COVERAGE_NAME
 
-    def __init__(self, collar: float = 0.0, skip_overlap: bool = False,
-                 weighted: bool = True, **kwargs):
-        super(DiarizationCoverage, self).__init__(
-            collar=collar, skip_overlap=skip_overlap,
-            weighted=weighted, **kwargs)
-
-    def compute_components(self,
-                           reference: Annotation,
-                           hypothesis: Annotation,
-                           uem: Optional[Timeline] = None,
-                           **kwargs) -> Details:
-        return super(DiarizationCoverage, self) \
-            .compute_components(hypothesis, reference, uem=uem, **kwargs)
+    def compute_components(
+        self,
+        reference: Annotation,
+        hypothesis: Annotation,
+        uem: Optional[Timeline] = None,
+        **kwargs
+    ) -> Details:
+        return super(DiarizationCoverage, self).compute_components(
+            hypothesis, reference, uem=uem, **kwargs
+        )
 
 
-PURITY_COVERAGE_NAME = 'F[purity|coverage]'
-PURITY_COVERAGE_LARGEST_CLASS = 'largest_class'
-PURITY_COVERAGE_TOTAL_CLUSTER = 'total_cluster'
-PURITY_COVERAGE_LARGEST_CLUSTER = 'largest_cluster'
-PURITY_COVERAGE_TOTAL_CLASS = 'total_class'
-
-
-class DiarizationPurityCoverageFMeasure(UEMSupportMixin, BaseMetric):
-    """Compute diarization purity and coverage, and return their F-score.
-
-    Parameters
-    ----------
-    weighted : bool, optional
-        When True (default), each cluster/class is weighted by its overall
-        duration.
-    collar : float, optional
-        Duration (in seconds) of collars removed from evaluation around
-        boundaries of reference segments.
-    skip_overlap : bool, optional
-        Set to True to not evaluate overlap regions.
-        Defaults to False (i.e. keep overlap regions).
-    beta : float, optional
-        When beta > 1, greater importance is given to coverage.
-        When beta < 1, greater importance is given to purity.
-        Defaults to 1.
-
-    See also
-    --------
-    pyannote.metrics.diarization.DiarizationPurity
-    pyannote.metrics.diarization.DiarizationCoverage
-    pyannote.metrics.base.f_measure
-
-    """
-
-    @classmethod
-    def metric_name(cls):
-        return PURITY_COVERAGE_NAME
-
-    @classmethod
-    def metric_components(cls) -> MetricComponents:
-        return [PURITY_COVERAGE_LARGEST_CLASS,
-                PURITY_COVERAGE_TOTAL_CLUSTER,
-                PURITY_COVERAGE_LARGEST_CLUSTER,
-                PURITY_COVERAGE_TOTAL_CLASS]
-
-    def __init__(self, collar: float = 0.0, skip_overlap: bool = False,
-                 weighted: bool = True, beta: float = 1., **kwargs):
-        super(DiarizationPurityCoverageFMeasure, self).__init__(**kwargs)
-        self.collar = collar
-        self.skip_overlap = skip_overlap
-        self.weighted = weighted
-        self.beta = beta
-
-    def compute_components(self,
-                           reference: Annotation,
-                           hypothesis: Annotation,
-                           uem: Optional[Timeline] = None,
-                           **kwargs) -> Details:
-
-        detail = self.init_components()
-
-        # crop reference and hypothesis to evaluated regions (uem)
-        reference, hypothesis = self.uemify(
-            reference, hypothesis, uem=uem,
-            collar=self.collar, skip_overlap=self.skip_overlap)
-
-        # cooccurrence matrix
-        matrix = reference * hypothesis
-
-        # duration of largest class in each cluster
-        largest_class = matrix.max(axis=0)
-        # duration of clusters
-        duration_cluster = matrix.sum(axis=0)
-
-        # duration of largest cluster in each class
-        largest_cluster = matrix.max(axis=1)
-        # duration of classes
-        duration_class = matrix.sum(axis=1)
-
-        if self.weighted:
-            # compute purity components
-            detail[PURITY_COVERAGE_LARGEST_CLASS] = 0.
-            if np.prod(matrix.shape):
-                detail[PURITY_COVERAGE_LARGEST_CLASS] = largest_class.sum()
-            detail[PURITY_COVERAGE_TOTAL_CLUSTER] = duration_cluster.sum()
-            # compute coverage components
-            detail[PURITY_COVERAGE_LARGEST_CLUSTER] = 0.
-            if np.prod(matrix.shape):
-                detail[PURITY_COVERAGE_LARGEST_CLUSTER] = largest_cluster.sum()
-            detail[PURITY_COVERAGE_TOTAL_CLASS] = duration_class.sum()
-
-        else:
-            # compute purity components
-            detail[PURITY_COVERAGE_LARGEST_CLASS] = (largest_class / duration_cluster).sum()
-            detail[PURITY_COVERAGE_TOTAL_CLUSTER] = len(largest_class)
-            # compute coverage components
-            detail[PURITY_COVERAGE_LARGEST_CLUSTER] = (largest_cluster / duration_class).sum()
-            detail[PURITY_COVERAGE_TOTAL_CLASS] = len(largest_cluster)
-
-        # compute purity
-        detail[PURITY_NAME] = \
-            1. if detail[PURITY_COVERAGE_TOTAL_CLUSTER] == 0. \
-                else detail[PURITY_COVERAGE_LARGEST_CLASS] / detail[PURITY_COVERAGE_TOTAL_CLUSTER]
-        # compute coverage
-        detail[COVERAGE_NAME] = \
-            1. if detail[PURITY_COVERAGE_TOTAL_CLASS] == 0. \
-                else detail[PURITY_COVERAGE_LARGEST_CLUSTER] / detail[PURITY_COVERAGE_TOTAL_CLASS]
-
-        return detail
-
-    def compute_metric(self, detail):
-        _, _, value = self.compute_metrics(detail=detail)
-        return value
-
-    def compute_metrics(self, detail=None):
-
-        detail = self.accumulated_ if detail is None else detail
-
-        purity = \
-            1. if detail[PURITY_COVERAGE_TOTAL_CLUSTER] == 0. \
-                else detail[PURITY_COVERAGE_LARGEST_CLASS] / detail[PURITY_COVERAGE_TOTAL_CLUSTER]
-
-        coverage = \
-            1. if detail[PURITY_COVERAGE_TOTAL_CLASS] == 0. \
-                else detail[PURITY_COVERAGE_LARGEST_CLUSTER] / detail[PURITY_COVERAGE_TOTAL_CLASS]
-
-        return purity, coverage, f_measure(purity, coverage, beta=self.beta)
-
-
-HOMOGENEITY_NAME = 'homogeneity'
-HOMOGENEITY_ENTROPY = 'entropy'
-HOMOGENEITY_CROSS_ENTROPY = 'cross-entropy'
+HOMOGENEITY_NAME = "homogeneity"
+HOMOGENEITY_ENTROPY = "entropy"
+HOMOGENEITY_CROSS_ENTROPY = "cross-entropy"
 
 
 class DiarizationHomogeneity(UEMSupportMixin, BaseMetric):
@@ -714,24 +558,28 @@ def metric_name(cls):
     def metric_components(cls):
         return [HOMOGENEITY_ENTROPY, HOMOGENEITY_CROSS_ENTROPY]
 
-    def __init__(self, collar: float = 0.0, skip_overlap: bool = False,
-                 **kwargs):
+    def __init__(self, collar: float = 0.0, skip_overlap: bool = False, **kwargs):
         super(DiarizationHomogeneity, self).__init__(**kwargs)
         self.collar = collar
         self.skip_overlap = skip_overlap
 
-    def compute_components(self,
-                           reference: Annotation,
-                           hypothesis: Annotation,
-                           uem: Optional[Timeline] = None,
-                           **kwargs) -> Details:
-
+    def compute_components(
+        self,
+        reference: Annotation,
+        hypothesis: Annotation,
+        uem: Optional[Timeline] = None,
+        **kwargs
+    ) -> Details:
         detail = self.init_components()
 
         # crop reference and hypothesis to evaluated regions (uem)
         reference, hypothesis = self.uemify(
-            reference, hypothesis, uem=uem,
-            collar=self.collar, skip_overlap=self.skip_overlap)
+            reference,
+            hypothesis,
+            uem=uem,
+            collar=self.collar,
+            skip_overlap=self.skip_overlap,
+        )
 
         # cooccurrence matrix
         matrix = reference * hypothesis
@@ -741,30 +589,30 @@ def compute_components(self,
         hduration = np.sum(matrix, axis=0)
 
         # reference entropy and reference/hypothesis cross-entropy
-        ratio = np.ma.divide(rduration, duration).filled(0.)
-        detail[HOMOGENEITY_ENTROPY] = \
-            -np.sum(ratio * np.ma.log(ratio).filled(0.))
+        ratio = np.ma.divide(rduration, duration).filled(0.0)
+        detail[HOMOGENEITY_ENTROPY] = -np.sum(ratio * np.ma.log(ratio).filled(0.0))
 
-        ratio = np.ma.divide(matrix, duration).filled(0.)
-        hratio = np.ma.divide(matrix, hduration).filled(0.)
-        detail[HOMOGENEITY_CROSS_ENTROPY] = \
-            -np.sum(ratio * np.ma.log(hratio).filled(0.))
+        ratio = np.ma.divide(matrix, duration).filled(0.0)
+        hratio = np.ma.divide(matrix, hduration).filled(0.0)
+        detail[HOMOGENEITY_CROSS_ENTROPY] = -np.sum(
+            ratio * np.ma.log(hratio).filled(0.0)
+        )
 
         return detail
 
     def compute_metric(self, detail):
-        numerator = 1. * detail[HOMOGENEITY_CROSS_ENTROPY]
-        denominator = 1. * detail[HOMOGENEITY_ENTROPY]
-        if denominator == 0.:
+        numerator = 1.0 * detail[HOMOGENEITY_CROSS_ENTROPY]
+        denominator = 1.0 * detail[HOMOGENEITY_ENTROPY]
+        if denominator == 0.0:
             if numerator == 0:
-                return 1.
+                return 1.0
             else:
-                return 0.
+                return 0.0
         else:
-            return 1. - numerator / denominator
+            return 1.0 - numerator / denominator
 
 
-COMPLETENESS_NAME = 'completeness'
+COMPLETENESS_NAME = "completeness"
 
 
 class DiarizationCompleteness(DiarizationHomogeneity):
@@ -785,10 +633,13 @@ class DiarizationCompleteness(DiarizationHomogeneity):
     def metric_name(cls):
         return COMPLETENESS_NAME
 
-    def compute_components(self,
-                           reference: Annotation,
-                           hypothesis: Annotation,
-                           uem: Optional[Timeline] = None,
-                           **kwargs) -> Details:
-        return super(DiarizationCompleteness, self) \
-            .compute_components(hypothesis, reference, uem=uem, **kwargs)
+    def compute_components(
+        self,
+        reference: Annotation,
+        hypothesis: Annotation,
+        uem: Optional[Timeline] = None,
+        **kwargs
+    ) -> Details:
+        return super(DiarizationCompleteness, self).compute_components(
+            hypothesis, reference, uem=uem, **kwargs
+        )

From f7c74bd1fd366cebf7d33f016e2579c750b39af4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= <hbredin@users.noreply.github.com>
Date: Thu, 9 Jan 2025 17:02:14 +0100
Subject: [PATCH 3/4] doc: update changelog

---
 docs/source/changelog.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 8bbbc4e..3e259d1 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -2,6 +2,11 @@
 Changelog
 #########
 
+develop
+~~~~~~~
+
+- BREAKING: improve diarization purity and coverage to account for overlapping regions
+
 Version 3.2.1 (2022-06-20)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 

From 798c0b8323cf704d49402ac7243cd1e76cc83929 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= <hbredin@users.noreply.github.com>
Date: Sun, 12 Jan 2025 17:47:38 +0100
Subject: [PATCH 4/4] doc: update changelog

---
 docs/source/changelog.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 3e259d1..45483ba 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -2,10 +2,11 @@
 Changelog
 #########
 
-develop
-~~~~~~~
+Version 3.3.0 (2025-01-12)
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 - BREAKING: improve diarization purity and coverage to account for overlapping regions
+- chore: use `bool` instead of deprecated `np.bool`
 
 Version 3.2.1 (2022-06-20)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~