Skip to content

Commit

Permalink
Merge branch 'main' into feature/expected-categories
Browse files Browse the repository at this point in the history
  • Loading branch information
ColdTeapot273K authored Nov 5, 2024
2 parents fcf01f1 + e958fd8 commit 4cd961e
Show file tree
Hide file tree
Showing 166 changed files with 1,366 additions and 660 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ jobs:
needs: [build_wheels, build_sdist]
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4.1.7
with:
name: artifact
path: dist
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- name: Build River
uses: ./.github/actions/install-env
with:
python-version: "3.12.3"
python-version: "3.12"

- name: Install extra Ubuntu dependencies
run: sudo apt-get install graphviz pandoc
Expand Down
32 changes: 17 additions & 15 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
files: river
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
rev: v4.4.0
hooks:
- id: check-json
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- id: mixed-line-ending

- repo: local
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.5.7
hooks:
# Run the linter.
- id: ruff
name: ruff
language: python
types: [python]
entry: ruff
args:
- --fix
types_or: [python, pyi, jupyter]
args: [--fix]
# Run the formatter.
- id: ruff-format
types_or: [python, pyi, jupyter]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.1.1"
hooks:
- id: mypy
name: mypy
language: python
types: [python]
entry: mypy --implicit-optional
args:
- "--config-file=pyproject.toml"
- "--python-version=3.11"
- "--implicit-optional"
2 changes: 1 addition & 1 deletion docs/introduction/why-use-river.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ River supports different machine learning tasks, including regression, classific

## User experience

River is not the only library allowing you to do online machine learning. But it might just the simplest one to use in the Python ecosystem. River plays nicely with Python dictionaries, therefore making it easy to use in the context of web applications where JSON payloads are aplenty.
River is not the only library allowing you to do online machine learning. But it might just be the simplest one to use in the Python ecosystem. River plays nicely with Python dictionaries, therefore making it easy to use in the context of web applications where JSON payloads are aplenty.
14 changes: 14 additions & 0 deletions docs/releases/unreleased.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Unreleased

- The units used in River have been corrected to be based on powers of 2 (KiB, MiB). This only changes the display, the behaviour is unchanged.
- The methods `learn_one`, `learn_many`, `update`, `revert`, and `append` now return `None`.

## cluster

Expand All @@ -9,9 +10,22 @@
- Add `render_ascii` in `cluster.ODAC` to render the hierarchical cluster's structure in text format.
- Work with `stats.Var` in `cluster.ODAC` when cluster has only one time series.

## drift

- Make `drift.ADWIN` comply with the reference MOA implementation.

## stats

- Removed the unexported class `stats.CentralMoments`.

## tree

- Instead of letting trees grow indefinitely, setting the `max_depth` parameter to `None` will stop the trees from growing when they reach the system recursion limit.
- Added `tree.LASTClassifier` (Local Adaptive Streaming Tree Classifier).

## stream

- `stream.iter_arff` now supports blank values (treated as missing values).

## preprocessing

Expand Down
1 change: 0 additions & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ plugins:

extra_javascript:
- javascripts/config.js
- https://polyfill.io/v3/polyfill.min.js?features=es6
- https://cdn.jsdelivr.net/npm/[email protected]/es5/tex-mml-chtml.js
- https://cdn.jsdelivr.net/npm/vega@5
- https://cdn.jsdelivr.net/npm/vega-lite@5
Expand Down
113 changes: 57 additions & 56 deletions poetry.lock

Large diffs are not rendered by default.

88 changes: 82 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ pandas = "^2.1"
graphviz = "^0.20.1"
gymnasium = "^0.29.0"
matplotlib = "^3.0.2"
mypy = "^1.6.1"
mypy = "^1.11.1"
pre-commit = "^3.5.0"
pytest = "^7.4.2"
ruff = "^0.1.1"
ruff = "^0.4.10"
scikit-learn = "^1.3.1"
sqlalchemy = "^2.0.22"
sympy = "^1.10.1"
Expand Down Expand Up @@ -117,20 +117,37 @@ markers = [
]

[tool.ruff]
select = ["E", "F", "I", "UP"] # https://beta.ruff.rs/docs/rules/
line-length = 100
target-version = 'py310'
extend-include = ["*.ipynb"]

[tool.ruff.lint]
select = [
# pycodestyle
"E",
# Pyflakes
"F",
# pyupgrade
"UP",
# isort
"I",
]
ignore = ["E501"]
fixable = ["ALL"]

[tool.ruff.isort]
[tool.ruff.lint.isort]
required-imports = ["from __future__ import annotations"]

[tool.ruff.format]
quote-style = "double"
indent-style = "space"

[tool.mypy]
files = "river"
strict = true

[[tool.mypy.overrides]]
module = [
"river.*",
"mmh3.*",
"numpy.*",
"sklearn.*",
Expand All @@ -141,6 +158,65 @@ module = [
"vaex.*",
"torch.*",
"sqlalchemy.*",
"requests.*"
"requests.*",
"gymnasium.*",
"sympy.*",
"polars.*"
]
ignore_missing_imports = true

[[tool.mypy.overrides]]
# Disable strict mode for all non fully-typed modules
module = [
"river.base.*",
"river.metrics.*",
"river.utils.*",
"river.stats.*",
"river.optim.*",
"river.datasets.*",
"river.tree.*",
"river.preprocessing.*",
"river.stream.*",
"river.linear_model.*",
"river.evaluate.*",
"river.drift.*",
"river.compose.*",
"river.bandit.*",
"river.cluster.*",
"river.anomaly.*",
"river.time_series.*",
"river.feature_extraction.*",
"river.ensemble.*",
"river.proba.*",
"river.multioutput.*",
"river.naive_bayes.*",
"river.checks.*",
"river.rules.*",
"river.model_selection.*",
"river.forest.*",
"river.neighbors.*",
"river.sketch.*",
"river.facto.*",
"river.covariance.*",
"river.compat.*",
"river.multiclass.*",
"river.reco.*",
"river.imblearn.*",
"river.feature_selection.*",
"river.misc.*",
"river.active.*",
"river.conf.*",
"river.neural_net.*",
"river.test_estimators",
"river.dummy",
]
# The strict option is global, the checks must be disabled one by one
warn_unused_ignores = false
check_untyped_defs = false
allow_subclassing_any = true
allow_any_generics = true
allow_untyped_calls = true
allow_incomplete_defs = true
allow_untyped_defs = true
implicit_reexport = true
warn_return_any = false
1 change: 1 addition & 0 deletions river/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
memory, or simply when it isn't available all at once. river's API is heavily inspired from that of
scikit-learn, enough so that users who are familiar with scikit-learn should feel right at home.
"""

from __future__ import annotations

from .__version__ import __version__ # noqa: F401
1 change: 1 addition & 0 deletions river/active/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Online active learning."""

from __future__ import annotations

from . import base
Expand Down
3 changes: 1 addition & 2 deletions river/active/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ def _wrapped_model(self):
return self.classifier

@abc.abstractmethod
def _ask_for_label(self, x, y_pred) -> bool:
...
def _ask_for_label(self, x, y_pred) -> bool: ...

def predict_proba_one(self, x, **kwargs):
"""Predict the probability of each label for `x` and indicate whether a label is needed.
Expand Down
4 changes: 1 addition & 3 deletions river/active/entropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ class EntropySampler(ActiveLearningClassifier):
"""

def __init__(
self, classifier: base.Classifier, discount_factor: float = 3, seed=None
):
def __init__(self, classifier: base.Classifier, discount_factor: float = 3, seed=None):
super().__init__(classifier, seed=seed)
self.discount_factor = discount_factor

Expand Down
6 changes: 3 additions & 3 deletions river/anomaly/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def _supervised(self):
return False

@abc.abstractmethod
def learn_one(self, x: dict):
def learn_one(self, x: dict) -> None:
"""Update the model.
Parameters
Expand Down Expand Up @@ -48,7 +48,7 @@ class SupervisedAnomalyDetector(base.Estimator):
"""A supervised anomaly detector."""

@abc.abstractmethod
def learn_one(self, x: dict, y: base.typing.Target):
def learn_one(self, x: dict, y: base.typing.Target) -> None:
"""Update the model.
Parameters
Expand Down Expand Up @@ -137,7 +137,7 @@ def score_one(self, *args, **kwargs):
"""
return self.anomaly_detector.score_one(*args, **kwargs)

def learn_one(self, *args, **learn_kwargs):
def learn_one(self, *args, **learn_kwargs) -> None:
"""Update the anomaly filter and the underlying anomaly detector.
Parameters
Expand Down
7 changes: 2 additions & 5 deletions river/anomaly/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,7 @@ class ThresholdFilter(anomaly.base.AnomalyFilter):
"""

def __init__(
self, anomaly_detector, threshold: float, protect_anomaly_detector=True
):
def __init__(self, anomaly_detector, threshold: float, protect_anomaly_detector=True):
super().__init__(
anomaly_detector=anomaly_detector,
protect_anomaly_detector=protect_anomaly_detector,
Expand Down Expand Up @@ -188,7 +186,6 @@ def _unit_test_params(cls):
from river import preprocessing

yield {
"anomaly_detector": preprocessing.StandardScaler()
| anomaly.OneClassSVM(nu=0.2),
"anomaly_detector": preprocessing.StandardScaler() | anomaly.OneClassSVM(nu=0.2),
"q": 0.995,
}
8 changes: 2 additions & 6 deletions river/anomaly/pad.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class PredictiveAnomalyDetection(anomaly.base.SupervisedAnomalyDetector):
>>> for t, (x, y) in enumerate(datasets.AirlinePassengers()):
... score = PAD.score_one(None, y)
... PAD = PAD.learn_one(None, y)
... PAD.learn_one(None, y)
... scores.append(score)
>>> print(scores[-1])
Expand All @@ -100,7 +100,6 @@ def __init__(
n_std: float = 3.0,
warmup_period: int = 0,
):

self.predictive_model = (
predictive_model
if predictive_model is not None
Expand All @@ -123,9 +122,7 @@ def learn_one(self, x: dict | None, y: base.typing.Target | float):
self.iter += 1

# Check whether the model is a time-series forecasting or regression/classification model
if isinstance(
self.predictive_model, time_series.base.Forecaster
) and isinstance(y, float):
if isinstance(self.predictive_model, time_series.base.Forecaster) and isinstance(y, float):
# When there's no data point as dict of features, the target will be passed
# to the forecaster as an exogenous variable.
if not x:
Expand All @@ -134,7 +131,6 @@ def learn_one(self, x: dict | None, y: base.typing.Target | float):
self.predictive_model.learn_one(y=y, x=x)
else:
self.predictive_model.learn_one(x=x, y=y)
return self

def score_one(self, x: dict, y: base.typing.Target):
# Return the predicted value of x from the predictive model, first by checking whether
Expand Down
1 change: 1 addition & 0 deletions river/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""River API module."""

from __future__ import annotations

from . import (
Expand Down
1 change: 1 addition & 0 deletions river/bandit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
(see `model_selection.BanditRegressor`).
"""

from __future__ import annotations

from . import base, datasets, envs
Expand Down
3 changes: 1 addition & 2 deletions river/bandit/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ def __post_init__(self):
)

@abc.abstractmethod
def _pull(self, arm_ids: list[ArmID]) -> ArmID:
...
def _pull(self, arm_ids: list[ArmID]) -> ArmID: ...

def pull(self, arm_ids: list[ArmID]) -> ArmID:
"""Pull arm(s).
Expand Down
6 changes: 3 additions & 3 deletions river/bandit/bayes_ucb.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ class BayesUCB(bandit.base.Policy):

def __init__(self, reward_obj=None, burn_in=0, seed: int | None = None):
super().__init__(reward_obj, burn_in)
self._posteriors: collections.defaultdict[
bandit.base.ArmID, proba.Beta
] = collections.defaultdict(proba.Beta)
self._posteriors: collections.defaultdict[bandit.base.ArmID, proba.Beta] = (
collections.defaultdict(proba.Beta)
)
self.seed = seed
self._rng = random.Random(seed)

Expand Down
4 changes: 2 additions & 2 deletions river/bandit/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@

RIVER_NAMESPACE = "river_bandits"

if (env_id := f"{RIVER_NAMESPACE}/CandyCaneContest-v0") not in gym.envs.registry:
if (env_id := f"{RIVER_NAMESPACE}/CandyCaneContest-v0") not in gym.envs.registration.registry:
gym.envs.registration.register(
id=env_id,
entry_point="river.bandit.envs:CandyCaneContest",
max_episode_steps=CandyCaneContest.n_steps,
)
if (env_id := f"{RIVER_NAMESPACE}/KArmedTestbed-v0") not in gym.envs.registry:
if (env_id := f"{RIVER_NAMESPACE}/KArmedTestbed-v0") not in gym.envs.registration.registry:
gym.envs.registration.register(
id=env_id,
entry_point="river.bandit.envs:KArmedTestbed",
Expand Down
Loading

0 comments on commit 4cd961e

Please sign in to comment.