Skip to content

Commit

Permalink
Limit tree growth to the system recursion limit
Browse files Browse the repository at this point in the history
  • Loading branch information
e10e3 committed Jul 26, 2024
1 parent bb77e55 commit 62137b0
Show file tree
Hide file tree
Showing 10 changed files with 24 additions and 12 deletions.
4 changes: 4 additions & 0 deletions docs/releases/unreleased.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Unreleased

- The units used in River have been corrected to be based on powers of 2 (KiB, MiB). This only changes the display, the behaviour is unchanged.

## tree

- Instead of letting trees grow indefinitely, setting the `max_depth` parameter to `None` will stop the trees from growing when they reach the system recursion limit.
4 changes: 2 additions & 2 deletions river/forest/adaptive_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ class ARFClassifier(BaseForest, base.Classifier):
split attempts.
max_depth
[*Tree parameter*] The maximum depth a tree can reach. If `None`, the
tree will grow indefinitely.
tree will grow until the system recursion limit.
split_criterion
[*Tree parameter*] Split criterion to use.<br/>
- 'gini' - Gini<br/>
Expand Down Expand Up @@ -767,7 +767,7 @@ class ARFRegressor(BaseForest, base.Regressor):
split attempts.
max_depth
[*Tree parameter*] The maximum depth a tree can reach. If `None`, the
tree will grow indefinitely.
tree will grow until the system recursion limit.
delta
[*Tree parameter*] Allowed error in split decision, a value closer to 0
takes longer to decide.
Expand Down
3 changes: 2 additions & 1 deletion river/tree/extremely_fast_decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class ExtremelyFastDecisionTreeClassifier(HoeffdingTreeClassifier):
grace_period
Number of instances a leaf should observe between split attempts.
max_depth
The maximum depth a tree can reach. If `None`, the tree will grow indefinitely.
The maximum depth a tree can reach. If `None`, the tree will grow until
the system recursion limit.
min_samples_reevaluate
Number of instances a node should observe before reevaluating the best split.
split_criterion
Expand Down
3 changes: 2 additions & 1 deletion river/tree/hoeffding_adaptive_tree_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class HoeffdingAdaptiveTreeClassifier(HoeffdingTreeClassifier):
grace_period
Number of instances a leaf should observe between split attempts.
max_depth
The maximum depth a tree can reach. If `None`, the tree will grow indefinitely.
The maximum depth a tree can reach. If `None`, the tree will grow until
the system recursion limit.
split_criterion
Split criterion to use.</br>
- 'gini' - Gini</br>
Expand Down
3 changes: 2 additions & 1 deletion river/tree/hoeffding_adaptive_tree_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ class HoeffdingAdaptiveTreeRegressor(HoeffdingTreeRegressor):
grace_period
Number of instances a leaf should observe between split attempts.
max_depth
The maximum depth a tree can reach. If `None`, the tree will grow indefinitely.
The maximum depth a tree can reach. If `None`, the tree will grow until
the system recursion limit.
delta
Significance level to calculate the Hoeffding bound. The significance level is given by
`1 - delta`. Values closer to zero imply longer split decision delays.
Expand Down
6 changes: 4 additions & 2 deletions river/tree/hoeffding_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import functools
import io
import math
import sys
from abc import ABC, abstractmethod

from river import base
Expand All @@ -30,7 +31,8 @@ class HoeffdingTree(ABC):
Parameters
----------
max_depth
The maximum depth a tree can reach. If `None`, the tree will grow indefinitely.
The maximum depth a tree can reach. If `None`, the tree will grow until
the system recursion limit.
binary_split
If True, only allow binary splits.
max_size
Expand Down Expand Up @@ -60,7 +62,7 @@ def __init__(
self._split_criterion: str = ""
self._leaf_prediction: str = ""

self.max_depth: float = max_depth if max_depth is not None else math.inf
self.max_depth: int = max_depth if max_depth is not None else (sys.getrecursionlimit() - 20)
self.binary_split: bool = binary_split
self._max_size: float = max_size
self._max_byte_size: float = self._max_size * (2**20) # convert to byte
Expand Down
3 changes: 2 additions & 1 deletion river/tree/hoeffding_tree_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class HoeffdingTreeClassifier(HoeffdingTree, base.Classifier):
grace_period
Number of instances a leaf should observe between split attempts.
max_depth
The maximum depth a tree can reach. If `None`, the tree will grow indefinitely.
The maximum depth a tree can reach. If `None`, the tree will grow until
the system recursion limit.
split_criterion
Split criterion to use.</br>
- 'gini' - Gini</br>
Expand Down
3 changes: 2 additions & 1 deletion river/tree/hoeffding_tree_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ class HoeffdingTreeRegressor(HoeffdingTree, base.Regressor):
grace_period
Number of instances a leaf should observe between split attempts.
max_depth
The maximum depth a tree can reach. If `None`, the tree will grow indefinitely.
The maximum depth a tree can reach. If `None`, the tree will grow until
the system recursion limit.
delta
Significance level to calculate the Hoeffding bound. The significance level is given by
`1 - delta`. Values closer to zero imply longer split decision delays.
Expand Down
3 changes: 2 additions & 1 deletion river/tree/isoup_tree_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class iSOUPTreeRegressor(tree.HoeffdingTreeRegressor, base.MultiTargetRegressor)
grace_period
Number of instances a leaf should observe between split attempts.
max_depth
The maximum depth a tree can reach. If `None`, the tree will grow indefinitely.
The maximum depth a tree can reach. If `None`, the tree will grow until
the system recursion limit.
delta
Allowed error in split decision, a value closer to 0 takes longer to
decide.
Expand Down
4 changes: 2 additions & 2 deletions river/tree/stochastic_gradient_tree.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import abc
import math
import sys

from scipy.stats import f as f_dist

Expand Down Expand Up @@ -38,7 +38,7 @@ def __init__(
self.delta = delta
self.grace_period = grace_period
self.init_pred = init_pred
self.max_depth = max_depth if max_depth else math.inf
self.max_depth = max_depth if max_depth else (sys.getrecursionlimit() - 20)

if lambda_value < 0.0:
raise ValueError('Invalid value: "lambda_value" must be positive.')
Expand Down

0 comments on commit 62137b0

Please sign in to comment.