diff --git a/docs/releases/unreleased.md b/docs/releases/unreleased.md index d7f30c07ac..c6edd98865 100644 --- a/docs/releases/unreleased.md +++ b/docs/releases/unreleased.md @@ -5,6 +5,8 @@ River's mini-batch methods now support pandas v2. In particular, River conforms ## anomaly - Added `anomaly.LocalOutlierFactor`, which is an online version of the LOF algorithm for anomaly detection that matches the scikit-learn implementation. + - Made `score_one` method of `anomaly.LocalOutlierFactor` stateless + - Defined default score for uninitialized detector ## clustering diff --git a/river/anomaly/lof.py b/river/anomaly/lof.py index 631593394c..ea6b3624fb 100644 --- a/river/anomaly/lof.py +++ b/river/anomaly/lof.py @@ -1,5 +1,6 @@ from __future__ import annotations +import copy import functools import pandas as pd @@ -220,7 +221,26 @@ class LocalOutlierFactor(anomaly.base.AnomalyDetector): ... scores.append(lof.score_one(x)) >>> [round(score, 3) for score in scores] - [1.802, 1.937, 1.567, 1.181, 1.28] + [1.802, 1.936, 1.566, 1.181, 1.272] + + >>> X = [0.5, 0.45, 0.43, 0.44, 0.445, 0.45, 0.0] + >>> lof = anomaly.LocalOutlierFactor() + + >>> for x in X[:3]: + ... lof.learn_one({'x': x}) # Warming up + + >>> for x in X: + ... features = {'x': x} + ... print( + ... f'Anomaly score for x={x:.3f}: {lof.score_one(features):.3f}') + ... lof.learn_one(features) + Anomaly score for x=0.500: 0.000 + Anomaly score for x=0.450: 0.000 + Anomaly score for x=0.430: 0.000 + Anomaly score for x=0.440: 1.020 + Anomaly score for x=0.445: 1.032 + Anomaly score for x=0.450: 0.000 + Anomaly score for x=0.000: 0.980 References ---------- @@ -342,10 +362,11 @@ def score_one(self, x: dict): self.x_scores.append(x) self.x_scores, equal = check_equal(self.x_scores, self.x_list) - if len(self.x_scores) == 0: - return None + if len(self.x_scores) == 0 or len(self.x_list) == 0: + return 0.0 x_list_copy = self.x_list.copy() + ( nm, x_list_copy, @@ -359,13 +380,13 @@ def score_one(self, x: dict): ) = expand_objects( self.x_scores, x_list_copy, - self.neighborhoods, - self.rev_neighborhoods, - self.k_dist, - self.reach_dist, - self.dist_dict, - self.local_reach, - self.lof, + self.neighborhoods.copy(), + self.rev_neighborhoods.copy(), + self.k_dist.copy(), + copy.deepcopy(self.reach_dist), + copy.deepcopy(self.dist_dict), + self.local_reach.copy(), + self.lof.copy(), ) neighborhoods, rev_neighborhoods, k_dist, dist_dict = self._initial_calculations( diff --git a/river/anomaly/test_lof.py b/river/anomaly/test_lof.py index 9703b8cdc9..c4933b0091 100644 --- a/river/anomaly/test_lof.py +++ b/river/anomaly/test_lof.py @@ -80,3 +80,33 @@ def test_issue_1328(): X = [{"a": 1, "b": 1}, {"a": 1, "b": 1}] for x in X: lof.learn_one(x) + + +def test_issue_1331(): + import copy + + from river import anomaly + + lof = anomaly.LocalOutlierFactor() + + X = [{"a": 1, "b": 1}, {"a": 1, "b": 1}] + for x in X: + lof.learn_one(x) + + neighborhoods_ = lof.neighborhoods.copy() + rev_neighborhoods = lof.rev_neighborhoods.copy() + k_dist_ = lof.k_dist.copy() + reach_dist_ = copy.deepcopy(lof.reach_dist) + dist_dict_ = copy.deepcopy(lof.dist_dict) + local_reach_ = lof.local_reach.copy() + lof_ = lof.lof.copy() + + lof.score_one({"a": 0.5, "b": 1}) + + assert neighborhoods_ == lof.neighborhoods + assert rev_neighborhoods == lof.rev_neighborhoods + assert k_dist_ == lof.k_dist + assert reach_dist_ == lof.reach_dist + assert dist_dict_ == lof.dist_dict + assert local_reach_ == lof.local_reach + assert lof_ == lof.lof