diff --git a/river/drift/teda_cdd.py b/river/drift/teda_cdd.py index 1ac66fef8c..17e64a2477 100644 --- a/river/drift/teda_cdd.py +++ b/river/drift/teda_cdd.py @@ -6,7 +6,61 @@ class TEDACDD(DriftDetector): - """ """ + r"""Concept Drift Detector based on Typically and Eccentrically Data Analytics (TEDA-CDD) + + Concept Drift Detector based on Typically and Eccentrically Data Analytics (TEDA-CDD) [^1] is a concept drift detector + based on TEDA [^2], a framework for data analytic leveraging on typicality and eccentricity. It employs two models in + monitoring the data stream in order to keep the information of a previous concept whereas monitoring the emergence + of a new concept. The models are considered to represent to distinct concepts when the intersection of data samples + are significantly low, described by the Jaccard Index (JI). + + TEDA-CDD has four essential components: reference data model, evolving data model, detection metric, and reset + strategy. The reference model uses the classical definition of TEDA to represent the concept known by the classifier, + disregarding any atypical data sample, whereas the evolving model uses an adaptation to describe the current features + state. + + The model indicates a concept drift when the reference and evolving models are sufficiently distinct. In this context, + a detection strategy based on the JI, considering the concept models as representations of sets, is proposed. Moreover, + when a concept drift is detected, TEDA-CDD is reset to start monitoring the current concept; and, to avoid a cold + restart, the information on the evolving model is used to update the reference model while a new evolving model is + created from scratch. + + Parameters + ---------- + m + Sensitivity parameter [^2]. A higher value of `m` makes it difficult to encounter atypical data samples, whereas + lower values causes more data samples to be considered atypical. + alpha + The forgetting factor, affecting the evolving model as a sensitivity parameter. + The higher the value of `alpha`, the more important is a data sample to the current concept and the evolving + model is more sensible to noise. + jaccard_threshold + The Jaccard Threshold (JT). A concept drift occurs if the jaccard index is lower than this given threshold. + This threshold defines a limit of similarity between similar and dissimilar and the higher the value of JT, the + more sensible to divergence is the CDD. + + Examples + ---------- + >>> import random + >>> from river import drift + + >>> rng = random.Random(12345) + >>> teda_cdd = TEDACDD(m=1, alpha=0.95, jaccard_threshold=0.95) + + >>> # Simulate a data stream composed by two data distributions + >>> data_stream = rng.choices([0, 1], k=1000) + rng.choices(range(4, 8), k=1000) + + >>> # Update drift detector and verify if change is detected + >>> for i, val in enumerate(data_stream): + ... teda_cdd.update(val) + + References + ---------- + [^1]: Y. T. P. Nunes, L. A. Guedes. 2024. Concept Drift Detection Based on Typicality and Eccentricity. IEEE Access + 12 (2024), pp. 13795-13808. doi: 10.1109/ACCESS.2024.3355959. + [^2]: P. Angelov. 2014. Anomaly detection based on eccentricity analysis. 2014 IEEE Symposium on Evolving and + Autonomous Learning Systems (EALS), Orlando, FL, USA, pp. 1-8. doi: 10.1109/EALS.2014.7009497. + """ def __init__(self, m=3.0, alpha=0.95, jaccard_threshold=0.8): super().__init__()