Improvements

eth-easl · Sep 12, 2024 · 4dddd24 · 4dddd24
1 parent bf1ba09
commit 4dddd24
Show file tree

Hide file tree

Showing 11 changed files with 109 additions and 96 deletions.
diff --git a/docs/pipeline/triggers/DRIFT_TRIGGER.md b/docs/pipeline/triggers/DRIFT_TRIGGER.md
@@ -197,8 +197,8 @@ classDiagram
         int window_size = 10
     }
 
-    class DynamicPercentileThresholdCriterion {
-        float percentile = 0.05
+    class DynamicQuantileThresholdCriterion {
+        float quantile = 0.05
     }
 
     class DynamicRollingAverageThresholdCriterion {
@@ -209,7 +209,7 @@ classDiagram
     DriftDecisionCriterion <|-- ThresholdDecisionCriterion
     DriftDecisionCriterion <|-- DynamicThresholdCriterion
 
-    DynamicThresholdCriterion <|-- DynamicPercentileThresholdCriterion
+    DynamicThresholdCriterion <|-- DynamicQuantileThresholdCriterion
     DynamicThresholdCriterion <|-- DynamicRollingAverageThresholdCriterion
 ```
 
@@ -252,8 +252,8 @@ classDiagram
         +Deque~float~ score_observations
     }
 
-    class DynamicPercentileThresholdPolicy {
-        +DynamicPercentileThresholdCriterion config
+    class DynamicQuantileThresholdPolicy {
+        +DynamicQuantileThresholdCriterion config
         +bool evaluate_decision(float distance)
     }
 
@@ -269,7 +269,7 @@ classDiagram
 
     DriftDecisionPolicy <|-- ThresholdDecisionPolicy
     DriftDecisionPolicy <|-- DynamicDecisionPolicy
-    DynamicDecisionPolicy <|-- DynamicPercentileThresholdPolicy
+    DynamicDecisionPolicy <|-- DynamicQuantileThresholdPolicy
     DynamicDecisionPolicy <|-- DynamicRollingAverageThresholdPolicy
     DriftDecisionPolicy <|-- HypothesisTestDecisionPolicy
 

diff --git a/experiments/yearbook/compare_trigger_policies/run.py b/experiments/yearbook/compare_trigger_policies/run.py
@@ -31,7 +31,7 @@
     AlibiDetectMmdDriftMetric,
 )
 from modyn.config.schema.pipeline.trigger.drift.criterion import (
-    DynamicPercentileThresholdCriterion,
+    DynamicQuantileThresholdCriterion,
     DynamicRollingAverageThresholdCriterion,
     ThresholdDecisionCriterion,
 )
@@ -227,16 +227,16 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
             for window_size in [5]
             for criterion_name, criterion in (
                 {
-                    f"mmd-perc-{percentile}-{window_size}": DynamicPercentileThresholdCriterion(
-                        window_size=window_size, percentile=percentile
+                    f"mmd-perc-{quantile}-{window_size}": DynamicQuantileThresholdCriterion(
+                        window_size=window_size, quantile=quantile
                     )
-                    for percentile in [0.05, 0.1, 0.2, 0.3]
+                    for quantile in [0.05, 0.1, 0.2, 0.3]
                     for window_size in [15]  # TODO [10, 20, 30]
                 }
                 | {
                     f"mmd-rollavg-{deviation}-{window_size}": DynamicRollingAverageThresholdCriterion(
                         window_size=window_size, deviation=deviation, absolute=False
-                    )  # TODO: avg / percentile
+                    )  # TODO: avg / quantile
                     for deviation in [0.025, 0.05, 0.1, 0.2, 0.3]
                     for window_size in [15]  # TODO [10, 20, 30]
                 }
@@ -293,7 +293,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
                         expected_accuracy=0.9,  # TODO: variable
                         allow_reduction=allow_reduction,
                         avoidable_misclassification_threshold=num_misclassifications,
-                    )  # TODO: avg / percentile
+                    )  # TODO: avg / quantile
                     for num_misclassifications in [100, 200, 500, 1000, 2000, 5000]
                     for allow_reduction in [True, False]
                 }

diff --git a/modyn/config/schema/pipeline/trigger/drift/criterion.py b/modyn/config/schema/pipeline/trigger/drift/criterion.py
@@ -16,14 +16,20 @@ class _DynamicThresholdCriterion(ModynBaseModel):
     needs_calibration: Literal[True] = Field(True)
 
 
-class DynamicPercentileThresholdCriterion(_DynamicThresholdCriterion):
-    """Dynamic threshold based on a extremeness percentile of the previous
+class DynamicQuantileThresholdCriterion(_DynamicThresholdCriterion):
+    """Dynamic threshold based on a extremeness quantile of the previous
     distance values."""
 
-    id: Literal["DynamicPercentileThresholdCriterion"] = "DynamicPercentileThresholdCriterion"
-    percentile: float = Field(
+    id: Literal["DynamicQuantileThresholdCriterion"] = "DynamicQuantileThresholdCriterion"
+    quantile: float = Field(
         0.05,
-        description="The percentile that a threshold has to be in to trigger a drift event.",
+        description=(
+            "The quantile that a threshold has to be in to trigger a drift event. "
+            "0.05 will only trigger in the most extreme 5% of cases. Hence the triggering "
+            "threshold is more extreme than 95% of the previous values."
+        ),
+        min=0.0,
+        max=1.0,
     )
 
 
@@ -42,9 +48,9 @@ class DynamicRollingAverageThresholdCriterion(_DynamicThresholdCriterion):
     )
 
 
-DynamicThresholdCriterion = DynamicPercentileThresholdCriterion | DynamicRollingAverageThresholdCriterion
+DynamicThresholdCriterion = DynamicQuantileThresholdCriterion | DynamicRollingAverageThresholdCriterion
 
 DriftDecisionCriterion = Annotated[
-    ThresholdDecisionCriterion | DynamicPercentileThresholdCriterion | DynamicRollingAverageThresholdCriterion,
+    ThresholdDecisionCriterion | DynamicQuantileThresholdCriterion | DynamicRollingAverageThresholdCriterion,
     Field(discriminator="id"),
 ]
diff --git a/modyn/config/schema/pipeline/trigger/performance/criterion.py b/modyn/config/schema/pipeline/trigger/performance/criterion.py
@@ -36,16 +36,18 @@ class _DynamicPerformanceThresholdCriterion(_PerformanceThresholdCriterion):
     needs_calibration: Literal[True] = Field(True)
 
 
-class DynamicPercentilePerformanceThresholdCriterion(_DynamicPerformanceThresholdCriterion):
-    """Dynamic threshold based on a extremeness percentile of the previous
+class DynamicQuantilePerformanceThresholdCriterion(_DynamicPerformanceThresholdCriterion):
+    """Dynamic threshold based on a extremeness quantile of the previous
     distance values."""
 
-    id: Literal["DynamicPercentilePerformanceThresholdCriterion"] = Field(
-        "DynamicPercentilePerformanceThresholdCriterion"
-    )
-    percentile: float = Field(
+    id: Literal["DynamicQuantilePerformanceThresholdCriterion"] = Field("DynamicQuantilePerformanceThresholdCriterion")
+    quantile: float = Field(
         0.05,
-        description="The percentile that a threshold has to be in to trigger a drift event.",
+        description=(
+            "The quantile that a threshold has to trigger. "
+            "0.05 will only trigger in the most extreme 5% of cases. Hence the triggering "
+            "threshold is more extreme than 95% of the previous values."
+        ),
     )
 
 
@@ -116,7 +118,7 @@ class StaticNumberAvoidableMisclassificationCriterion(_NumberAvoidableMisclassif
 
 PerformanceTriggerCriterion = Annotated[
     StaticPerformanceThresholdCriterion
-    | DynamicPercentilePerformanceThresholdCriterion
+    | DynamicQuantilePerformanceThresholdCriterion
     | DynamicRollingAveragePerformanceThresholdCriterion
     | StaticNumberAvoidableMisclassificationCriterion,
     Field(discriminator="id"),

diff --git a/modyn/supervisor/internal/triggers/datadrifttrigger.py b/modyn/supervisor/internal/triggers/datadrifttrigger.py
@@ -7,7 +7,7 @@
 
 from modyn.config.schema.pipeline import DataDriftTriggerConfig
 from modyn.config.schema.pipeline.trigger.drift.criterion import (
-    DynamicPercentileThresholdCriterion,
+    DynamicQuantileThresholdCriterion,
     DynamicRollingAverageThresholdCriterion,
     ThresholdDecisionCriterion,
 )
@@ -42,7 +42,7 @@
 )
 from modyn.supervisor.internal.triggers.utils.decision_policy import (
     DecisionPolicy,
-    DynamicPercentileThresholdPolicy,
+    DynamicQuantileThresholdPolicy,
     DynamicRollingAverageThresholdPolicy,
     StaticThresholdDecisionPolicy,
 )
@@ -366,10 +366,10 @@ def _setup_decision_policies(
             policies[metric_name] = StaticThresholdDecisionPolicy(
                 threshold=criterion.threshold, triggering_direction="higher"
             )
-        elif isinstance(criterion, DynamicPercentileThresholdCriterion):
-            policies[metric_name] = DynamicPercentileThresholdPolicy(
+        elif isinstance(criterion, DynamicQuantileThresholdCriterion):
+            policies[metric_name] = DynamicQuantileThresholdPolicy(
                 window_size=criterion.window_size,
-                percentile=criterion.percentile,
+                quantile=criterion.quantile,
                 triggering_direction="higher",
             )
         elif isinstance(criterion, DynamicRollingAverageThresholdCriterion):

diff --git a/modyn/supervisor/internal/triggers/performance/decision_policy.py b/modyn/supervisor/internal/triggers/performance/decision_policy.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 
 from modyn.config.schema.pipeline.trigger.performance.criterion import (
-    DynamicPercentilePerformanceThresholdCriterion,
+    DynamicQuantilePerformanceThresholdCriterion,
     DynamicRollingAveragePerformanceThresholdCriterion,
     StaticNumberAvoidableMisclassificationCriterion,
     StaticPerformanceThresholdCriterion,
@@ -17,7 +17,7 @@
     PerformanceTracker,
 )
 from modyn.supervisor.internal.triggers.utils.decision_policy import (
-    DynamicPercentileThresholdPolicy,
+    DynamicQuantileThresholdPolicy,
     DynamicRollingAverageThresholdPolicy,
     StaticThresholdDecisionPolicy,
 )
@@ -79,17 +79,17 @@ def evaluate_decision(
         return self._wrapped.evaluate_decision(measurement=evaluation_scores[self.metric])
 
 
-class DynamicPerformancePercentileThresholdPolicy(PerformanceDecisionPolicy):
+class DynamicPerformanceQuantileThresholdPolicy(PerformanceDecisionPolicy):
     """Wrapper for DynamicRollingAverageThresholdPolicy.
 
-    Triggers if value is in the lower percentile of the rolling window.
+    Triggers if value is in the lower quantile of the rolling window.
     """
 
-    def __init__(self, config: DynamicPercentilePerformanceThresholdCriterion):
+    def __init__(self, config: DynamicQuantilePerformanceThresholdCriterion):
         self.metric = config.metric
-        self._wrapped = DynamicPercentileThresholdPolicy(
+        self._wrapped = DynamicQuantileThresholdPolicy(
             window_size=config.window_size,
-            percentile=config.percentile,
+            quantile=config.quantile,
             triggering_direction="lower",
         )
 

diff --git a/modyn/supervisor/internal/triggers/performancetrigger.py b/modyn/supervisor/internal/triggers/performancetrigger.py
@@ -5,7 +5,7 @@
 from typing_extensions import override
 
 from modyn.config.schema.pipeline.trigger.performance.criterion import (
-    DynamicPercentilePerformanceThresholdCriterion,
+    DynamicQuantilePerformanceThresholdCriterion,
     DynamicRollingAveragePerformanceThresholdCriterion,
     StaticNumberAvoidableMisclassificationCriterion,
     StaticPerformanceThresholdCriterion,
@@ -15,7 +15,7 @@
 )
 from modyn.supervisor.internal.triggers.batchedtrigger import BatchedTrigger
 from modyn.supervisor.internal.triggers.performance.decision_policy import (
-    DynamicPerformancePercentileThresholdPolicy,
+    DynamicPerformanceQuantileThresholdPolicy,
     DynamicPerformanceRollingAverageThresholdPolicy,
     PerformanceDecisionPolicy,
     StaticNumberAvoidableMisclassificationDecisionPolicy,
@@ -157,8 +157,8 @@ def _setup_decision_policies(
     for name, criterion in config.decision_criteria.items():
         if isinstance(criterion, StaticPerformanceThresholdCriterion):
             policies[name] = StaticPerformanceThresholdDecisionPolicy(criterion)
-        elif isinstance(criterion, DynamicPercentilePerformanceThresholdCriterion):
-            policies[name] = DynamicPerformancePercentileThresholdPolicy(criterion)
+        elif isinstance(criterion, DynamicQuantilePerformanceThresholdCriterion):
+            policies[name] = DynamicPerformanceQuantileThresholdPolicy(criterion)
         elif isinstance(criterion, DynamicRollingAveragePerformanceThresholdCriterion):
             policies[name] = DynamicPerformanceRollingAverageThresholdPolicy(criterion)
         elif isinstance(criterion, StaticNumberAvoidableMisclassificationCriterion):

diff --git a/modyn/supervisor/internal/triggers/utils/decision_policy.py b/modyn/supervisor/internal/triggers/utils/decision_policy.py
@@ -2,6 +2,8 @@
 from collections import deque
 from typing import Literal
 
+import numpy as np
+
 
 class DecisionPolicy(ABC):
     """Decision policy that will make the binary triggering decisions based on
@@ -66,50 +68,51 @@ def __init__(self, window_size: int, triggering_direction: Literal["higher", "lo
         self.score_observations: deque = deque(maxlen=window_size)
 
 
-class DynamicPercentileThresholdPolicy(DynamicDecisionPolicy):
-    """Dynamic threshold based on a extremeness percentile of the previous
+class DynamicQuantileThresholdPolicy(DynamicDecisionPolicy):
+    """Dynamic threshold based on a extremeness quantile of the previous
     measurement values.
 
     We compare a new measurement value with the series of previous measurement values
-    and decide if it's more extreme than a certain percentile of the series. Therefore we count the
+    and decide if it's more extreme than a certain quantile of the series. Therefore we count the
     `num_more_extreme` values that are greater than the new measurement and compare it with the
-    `percentile` threshold.
+    `quantile` threshold.
     """
 
     def __init__(
         self,
         window_size: int,
-        percentile: float,
+        quantile: float,
         triggering_direction: Literal["higher", "lower"],
     ):
         """
         Args:
             window_size: The size of the observations to be considered for the decision.
-            percentile: The percentile that a threshold has to be in to trigger event.
+            quantile: The quantile that a threshold has to be in to trigger event.
             triggering_direction: Whether a higher score should produce a trigger or a lower score.
         """
         super().__init__(window_size, triggering_direction)
-        self.percentile = percentile
+        self.quantile = quantile
 
     def evaluate_decision(self, measurement: float) -> bool:
         if len(self.score_observations) == 0:
             self.score_observations.append(measurement)
             return True
 
-        sorted_observations = list(sorted(self.score_observations, reverse=self.triggering_direction == "lower"))
-
-        threshold = sorted_observations[
-            min(
-                max(
-                    0,
-                    int(round(len(sorted_observations) * (1.0 - self.percentile))) - 1,  # from length to index space
-                ),
-                len(sorted_observations) - 1,
-            )
-        ]
+        # let's linearly interpolate to find the most extreme self.quantile value
+        quantile_threshold = (
+            # direction: higher --> higher is worse and invokes a trigger
+            # most extreme 5% --> numpy quantile 0.95
+            1 - self.quantile
+            if self.triggering_direction == "higher"
+            # direction: lower --> lower is worse and invokes a trigger
+            # most extreme 5% --> numpy quantile 0.05
+            else self.quantile
+        )
+        threshold = float(np.quantile(a=self.score_observations, q=quantile_threshold, method="linear"))
+
         self.score_observations.append(measurement)
 
-        return measurement >= threshold if self.triggering_direction == "higher" else measurement <= threshold
+        return measurement > threshold if self.triggering_direction == "higher" else measurement < threshold
 
 
 class DynamicRollingAverageThresholdPolicy(DynamicDecisionPolicy):

diff --git a/modyn/tests/supervisor/internal/triggers/drift/test_evidently_detector.py b/modyn/tests/supervisor/internal/triggers/drift/test_evidently_detector.py
@@ -8,7 +8,7 @@
     EvidentlySimpleDistanceDriftMetric,
 )
 from modyn.config.schema.pipeline.trigger.drift.criterion import (
-    DynamicPercentileThresholdCriterion,
+    DynamicQuantileThresholdCriterion,
 )
 from modyn.config.schema.pipeline.trigger.drift.evidently import (
     EvidentlyHellingerDistanceDriftMetric,
@@ -41,26 +41,26 @@ def df_data_cur(data_cur: np.ndarray) -> pd.DataFrame:
 
 @pytest.fixture
 def model_drift_metric() -> EvidentlyModelDriftMetric:
-    return EvidentlyModelDriftMetric(bootstrap=False, decision_criterion=DynamicPercentileThresholdCriterion())
+    return EvidentlyModelDriftMetric(bootstrap=False, decision_criterion=DynamicQuantileThresholdCriterion())
 
 
 @pytest.fixture
 def ratio_drift_metric() -> EvidentlyRatioDriftMetric:
-    return EvidentlyRatioDriftMetric(decision_criterion=DynamicPercentileThresholdCriterion())
+    return EvidentlyRatioDriftMetric(decision_criterion=DynamicQuantileThresholdCriterion())
 
 
 @pytest.fixture
 def simple_distance_drift_metric() -> EvidentlySimpleDistanceDriftMetric:
     return EvidentlySimpleDistanceDriftMetric(
         bootstrap=False,
         distance_metric="euclidean",
-        decision_criterion=DynamicPercentileThresholdCriterion(),
+        decision_criterion=DynamicQuantileThresholdCriterion(),
     )
 
 
 @pytest.fixture
 def hellinger_distance_drift_metric() -> EvidentlySimpleDistanceDriftMetric:
-    return EvidentlyHellingerDistanceDriftMetric(decision_criterion=DynamicPercentileThresholdCriterion())
+    return EvidentlyHellingerDistanceDriftMetric(decision_criterion=DynamicQuantileThresholdCriterion())
 
 
 def test_evidently_additional_metric_computation_hellinger(

diff --git a/modyn/tests/supervisor/internal/triggers/test_datadrifttrigger.py b/modyn/tests/supervisor/internal/triggers/test_datadrifttrigger.py
@@ -13,7 +13,7 @@
 )
 from modyn.config.schema.pipeline.trigger.drift.config import AmountWindowingStrategy
 from modyn.config.schema.pipeline.trigger.drift.criterion import (
-    DynamicPercentileThresholdCriterion,
+    DynamicQuantileThresholdCriterion,
     ThresholdDecisionCriterion,
 )
 from modyn.config.schema.pipeline.trigger.drift.detection_window import (
@@ -271,7 +271,7 @@ def test_warmup_trigger(mock_drift_trigger: DataDriftTrigger) -> None:
         evaluation_interval_data_points=5,
         metrics={
             "mmd": AlibiDetectMmdDriftMetric(
-                decision_criterion=DynamicPercentileThresholdCriterion(percentile=50, window_size=3),
+                decision_criterion=DynamicQuantileThresholdCriterion(quantile=50, window_size=3),
             )
         },
         aggregation_strategy=MajorityVoteDriftAggregationStrategy(),