From c8ae7687249c4ee6a9930c7a50a552e79eb69a86 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <me@borchero.com>
Date: Sun, 14 Jan 2024 18:08:43 +0100
Subject: [PATCH 01/10] [python-package] Allow to pass early stopping min delta
 in params

---
 python-package/lightgbm/engine.py        |  2 ++
 tests/python_package_test/test_engine.py | 14 ++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
index 822aa3b35017..ac0aeabc2c26 100644
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -237,6 +237,7 @@ def train(
             callback.early_stopping(
                 stopping_rounds=params["early_stopping_round"],  # type: ignore[arg-type]
                 first_metric_only=first_metric_only,
+                min_delta=params.get("early_stopping_min_delta", 0.0),
                 verbose=_choose_param_value(
                     main_param_name="verbosity",
                     params=params,
@@ -737,6 +738,7 @@ def cv(
             callback.early_stopping(
                 stopping_rounds=params["early_stopping_round"],  # type: ignore[arg-type]
                 first_metric_only=first_metric_only,
+                min_delta=params.get("early_stopping_min_delta", 0.0),
                 verbose=_choose_param_value(
                     main_param_name="verbosity",
                     params=params,
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index e355e5ab074a..e4d7b48861dd 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -981,8 +981,11 @@ def train_fn():
         assert bst.best_iteration == 0
 
 
-@pytest.mark.parametrize('first_metric_only', [True, False])
-def test_early_stopping_via_global_params(first_metric_only):
+@pytest.mark.parametrize(
+    ('first_metric_only', 'early_stopping_min_delta'),
+    [(True, 0.0), (True, 1e3), (False, 0.0)]
+)
+def test_early_stopping_via_global_params(first_metric_only, early_stopping_min_delta):
     X, y = load_breast_cancer(return_X_y=True)
     num_trees = 5
     params = {
@@ -991,7 +994,8 @@ def test_early_stopping_via_global_params(first_metric_only):
         'metric': 'None',
         'verbose': -1,
         'early_stopping_round': 2,
-        'first_metric_only': first_metric_only
+        'first_metric_only': first_metric_only,
+        'early_stopping_min_delta': early_stopping_min_delta,
     }
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
@@ -1002,8 +1006,10 @@ def test_early_stopping_via_global_params(first_metric_only):
                     feval=[decreasing_metric, constant_metric],
                     valid_sets=lgb_eval,
                     valid_names=valid_set_name)
-    if first_metric_only:
+    if first_metric_only and early_stopping_min_delta == 0:
         assert gbm.best_iteration == num_trees
+    elif first_metric_only:
+        assert gbm.best_iteration == 2
     else:
         assert gbm.best_iteration == 1
     assert valid_set_name in gbm.best_score

From 9c9f540b538fe6ebd9fb0cd5bed2108228e2ca7f Mon Sep 17 00:00:00 2001
From: Oliver Borchert <me@borchero.com>
Date: Sun, 14 Jan 2024 18:35:53 +0100
Subject: [PATCH 02/10] Fix test

---
 tests/python_package_test/test_engine.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index e4d7b48861dd..44f44ed76481 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1008,8 +1008,6 @@ def test_early_stopping_via_global_params(first_metric_only, early_stopping_min_
                     valid_names=valid_set_name)
     if first_metric_only and early_stopping_min_delta == 0:
         assert gbm.best_iteration == num_trees
-    elif first_metric_only:
-        assert gbm.best_iteration == 2
     else:
         assert gbm.best_iteration == 1
     assert valid_set_name in gbm.best_score

From a3f584aa43112768801ca7f3f7b294f05da1de0c Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Thu, 15 Feb 2024 13:06:33 +0100
Subject: [PATCH 03/10] Add separate test

---
 tests/python_package_test/test_engine.py | 39 +++++++++++++++++++-----
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 44f44ed76481..4d521319e4c4 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -981,11 +981,8 @@ def train_fn():
         assert bst.best_iteration == 0
 
 
-@pytest.mark.parametrize(
-    ('first_metric_only', 'early_stopping_min_delta'),
-    [(True, 0.0), (True, 1e3), (False, 0.0)]
-)
-def test_early_stopping_via_global_params(first_metric_only, early_stopping_min_delta):
+@pytest.mark.parametrize('first_metric_only', [True, False])
+def test_early_stopping_via_global_params(first_metric_only):
     X, y = load_breast_cancer(return_X_y=True)
     num_trees = 5
     params = {
@@ -994,8 +991,7 @@ def test_early_stopping_via_global_params(first_metric_only, early_stopping_min_
         'metric': 'None',
         'verbose': -1,
         'early_stopping_round': 2,
-        'first_metric_only': first_metric_only,
-        'early_stopping_min_delta': early_stopping_min_delta,
+        'first_metric_only': first_metric_only
     }
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
@@ -1006,7 +1002,7 @@ def test_early_stopping_via_global_params(first_metric_only, early_stopping_min_
                     feval=[decreasing_metric, constant_metric],
                     valid_sets=lgb_eval,
                     valid_names=valid_set_name)
-    if first_metric_only and early_stopping_min_delta == 0:
+    if first_metric_only:
         assert gbm.best_iteration == num_trees
     else:
         assert gbm.best_iteration == 1
@@ -1015,6 +1011,33 @@ def test_early_stopping_via_global_params(first_metric_only, early_stopping_min_
     assert 'error' in gbm.best_score[valid_set_name]
 
 
+@pytest.mark.parametrize('early_stopping_min_delta', [1e-3, 0.0])
+def test_early_stopping_min_delta_via_global_params(early_stopping_min_delta):
+    X, y = load_breast_cancer(return_X_y=True)
+    num_trees = 5
+    params = {
+        'num_trees': num_trees,
+        'objective': 'binary',
+        'metric': 'None',
+        'verbose': -1,
+        'early_stopping_round': 2,
+        'early_stopping_min_delta': early_stopping_min_delta,
+    }
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
+    lgb_train = lgb.Dataset(X_train, y_train)
+    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
+    valid_set_name = 'valid_set'
+    gbm = lgb.train(params,
+                    lgb_train,
+                    feval=decreasing_metric,
+                    valid_sets=lgb_eval,
+                    valid_names=valid_set_name)
+    if early_stopping_min_delta == 0:
+        assert gbm.best_iteration == num_trees
+    else:
+        assert gbm.best_iteration == 1
+
+
 @pytest.mark.parametrize('first_only', [True, False])
 @pytest.mark.parametrize('single_metric', [True, False])
 @pytest.mark.parametrize('greater_is_better', [True, False])

From 125e9f5e86e3e2b4d78f57494672f9d8de8679d3 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Mon, 18 Mar 2024 15:28:28 +0100
Subject: [PATCH 04/10] Fix

---
 tests/python_package_test/test_engine.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 84c00ba0feac..6f31e276dd3e 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -938,27 +938,23 @@ def test_early_stopping_via_global_params(first_metric_only):
     assert "error" in gbm.best_score[valid_set_name]
 
 
-@pytest.mark.parametrize('early_stopping_min_delta', [1e3, 0.0])
+@pytest.mark.parametrize("early_stopping_min_delta", [1e3, 0.0])
 def test_early_stopping_min_delta_via_global_params(early_stopping_min_delta):
     X, y = load_breast_cancer(return_X_y=True)
     num_trees = 5
     params = {
-        'num_trees': num_trees,
-        'objective': 'binary',
-        'metric': 'None',
-        'verbose': -1,
-        'early_stopping_round': 2,
-        'early_stopping_min_delta': early_stopping_min_delta,
+        "num_trees": num_trees,
+        "objective": "binary",
+        "metric": "None",
+        "verbose": -1,
+        "early_stopping_round": 2,
+        "early_stopping_min_delta": early_stopping_min_delta,
     }
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
-    valid_set_name = 'valid_set'
-    gbm = lgb.train(params,
-                    lgb_train,
-                    feval=decreasing_metric,
-                    valid_sets=lgb_eval,
-                    valid_names=valid_set_name)
+    valid_set_name = "valid_set"
+    gbm = lgb.train(params, lgb_train, feval=decreasing_metric, valid_sets=lgb_eval, valid_names=valid_set_name)
     if early_stopping_min_delta == 0:
         assert gbm.best_iteration == num_trees
     else:

From a21c9e325df62f388d7493c1b49377481879b152 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Tue, 23 Apr 2024 01:42:16 +0200
Subject: [PATCH 05/10] Add to cpp config

---
 docs/Parameters.rst                      | 4 ++++
 include/LightGBM/config.h                | 4 ++++
 src/io/config_auto.cpp                   | 7 +++++++
 tests/python_package_test/test_engine.py | 1 +
 4 files changed, 16 insertions(+)

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 341cdd487c71..d4a544be4682 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -402,6 +402,10 @@ Learning Control Parameters
 
    -  can be used to speed up training
 
+-  ``early_stopping_min_delta`` :raw-html:`<a id="early_stopping_min_delta" title="Permalink to this parameter" href="#early_stopping_min_delta">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, constraints: ``early_stopping_min_delta >= 0.0``
+
+   -  when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement
+
 -  ``first_metric_only`` :raw-html:`<a id="first_metric_only" title="Permalink to this parameter" href="#first_metric_only">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
 
    -  LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 6500cb77272d..6b240e18e5d8 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -386,6 +386,10 @@ struct Config {
   // desc = can be used to speed up training
   int early_stopping_round = 0;
 
+  // check = >=0.0
+  // desc = when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement
+  double early_stopping_min_delta = 0.0;
+
   // desc = LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
   bool first_metric_only = false;
 
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 394614af3f33..ca4fda1c3d4c 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -214,6 +214,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
   "extra_trees",
   "extra_seed",
   "early_stopping_round",
+  "early_stopping_min_delta",
   "first_metric_only",
   "max_delta_step",
   "lambda_l1",
@@ -392,6 +393,9 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
 
   GetInt(params, "early_stopping_round", &early_stopping_round);
 
+  GetDouble(params, "early_stopping_min_delta", &early_stopping_min_delta);
+  CHECK_GE(early_stopping_min_delta, 0.0);
+
   GetBool(params, "first_metric_only", &first_metric_only);
 
   GetDouble(params, "max_delta_step", &max_delta_step);
@@ -690,6 +694,7 @@ std::string Config::SaveMembersToString() const {
   str_buf << "[extra_trees: " << extra_trees << "]\n";
   str_buf << "[extra_seed: " << extra_seed << "]\n";
   str_buf << "[early_stopping_round: " << early_stopping_round << "]\n";
+  str_buf << "[early_stopping_min_delta: " << early_stopping_min_delta << "]\n";
   str_buf << "[first_metric_only: " << first_metric_only << "]\n";
   str_buf << "[max_delta_step: " << max_delta_step << "]\n";
   str_buf << "[lambda_l1: " << lambda_l1 << "]\n";
@@ -814,6 +819,7 @@ const std::unordered_map<std::string, std::vector<std::string>>& Config::paramet
     {"extra_trees", {"extra_tree"}},
     {"extra_seed", {}},
     {"early_stopping_round", {"early_stopping_rounds", "early_stopping", "n_iter_no_change"}},
+    {"early_stopping_min_delta", {}},
     {"first_metric_only", {}},
     {"max_delta_step", {"max_tree_output", "max_leaf_output"}},
     {"lambda_l1", {"reg_alpha", "l1_regularization"}},
@@ -957,6 +963,7 @@ const std::unordered_map<std::string, std::string>& Config::ParameterTypes() {
     {"extra_trees", "bool"},
     {"extra_seed", "int"},
     {"early_stopping_round", "int"},
+    {"early_stopping_min_delta", "double"},
     {"first_metric_only", "bool"},
     {"max_delta_step", "double"},
     {"lambda_l1", "double"},
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 6f31e276dd3e..180990a77825 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -959,6 +959,7 @@ def test_early_stopping_min_delta_via_global_params(early_stopping_min_delta):
         assert gbm.best_iteration == num_trees
     else:
         assert gbm.best_iteration == 1
+    assert f"[early_stopping_min_delta: {early_stopping_min_delta:.0f}]" in gbm.model_to_string()
 
 
 @pytest.mark.parametrize("first_only", [True, False])

From 95b6ad783b600c5b1170a28d9810a20a99bd20ee Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Tue, 23 Apr 2024 14:45:56 +0200
Subject: [PATCH 06/10] Adjust test

---
 src/boosting/gbdt.cpp                    | 4 +++-
 src/boosting/gbdt.h                      | 2 ++
 tests/python_package_test/test_engine.py | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 5be3b9765bc4..86a8a5a3ca65 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -30,6 +30,7 @@ GBDT::GBDT()
       config_(nullptr),
       objective_function_(nullptr),
       early_stopping_round_(0),
+      early_stopping_min_delta_(0.0),
       es_first_metric_only_(false),
       max_feature_idx_(0),
       num_tree_per_iteration_(1),
@@ -65,6 +66,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   num_class_ = config->num_class;
   config_ = std::unique_ptr<Config>(new Config(*config));
   early_stopping_round_ = config_->early_stopping_round;
+  early_stopping_min_delta_ = config->early_stopping_min_delta;
   es_first_metric_only_ = config_->first_metric_only;
   shrinkage_rate_ = config_->learning_rate;
 
@@ -576,7 +578,7 @@ std::string GBDT::OutputMetric(int iter) {
         if (es_first_metric_only_ && j > 0) { continue; }
         if (ret.empty() && early_stopping_round_ > 0) {
           auto cur_score = valid_metrics_[i][j]->factor_to_bigger_better() * test_scores.back();
-          if (cur_score > best_score_[i][j]) {
+          if (cur_score - best_score_[i][j] > early_stopping_min_delta_) {
             best_score_[i][j] = cur_score;
             best_iter_[i][j] = iter;
             meet_early_stopping_pairs.emplace_back(i, j);
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index 28ebee446fad..4557830fa863 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -532,6 +532,8 @@ class GBDT : public GBDTBase {
   std::vector<std::vector<const Metric*>> valid_metrics_;
   /*! \brief Number of rounds for early stopping */
   int early_stopping_round_;
+  /*! \brief Minimum improvement for early stopping */
+  double early_stopping_min_delta_;
   /*! \brief Only use first metric for early stopping */
   bool es_first_metric_only_;
   /*! \brief Best iteration(s) for early stopping */
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 887f547c036c..be53b6810e56 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1088,7 +1088,6 @@ def test_early_stopping_min_delta_via_global_params(early_stopping_min_delta):
         assert gbm.best_iteration == num_trees
     else:
         assert gbm.best_iteration == 1
-    assert f"[early_stopping_min_delta: {early_stopping_min_delta:.0f}]" in gbm.model_to_string()
 
 
 def test_early_stopping_can_be_triggered_via_custom_callback():
@@ -1580,6 +1579,7 @@ def test_all_expected_params_are_written_out_to_model_text(tmp_path):
         "[extra_trees: 0]",
         "[extra_seed: 6642]",
         "[early_stopping_round: 0]",
+        "[early_stopping_min_delta: 0]",
         "[first_metric_only: 0]",
         "[max_delta_step: 0]",
         "[lambda_l1: 0]",

From fe02801f8bde61fa8dcb59315f1b2bf956c23a4e Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Tue, 23 Apr 2024 14:49:36 +0200
Subject: [PATCH 07/10] Adjust test

---
 R-package/tests/testthat/test_lgb.Booster.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index e6b0e8abda64..7bf0a1bf43d2 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -850,6 +850,7 @@ test_that("all parameters are stored correctly with save_model_to_string()", {
         , "[extra_trees: 0]"
         , "[extra_seed: 6642]"
         , "[early_stopping_round: 0]"
+        , "[early_stopping_min_delta: 0]"
         , "[first_metric_only: 0]"
         , "[max_delta_step: 0]"
         , "[lambda_l1: 0]"

From 3e5bf81dad636402cabd5dcfe8004cff5cfe30b0 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Thu, 25 Apr 2024 10:23:20 +0200
Subject: [PATCH 08/10] Debug

---
 .ci/setup.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.ci/setup.sh b/.ci/setup.sh
index ec9e3f114655..195e9e6ea2ee 100755
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -24,6 +24,7 @@ if [[ $OS_NAME == "macos" ]]; then
     fi
     if [[ $TASK == "mpi" ]]; then
         brew install open-mpi
+        tree /usr/local/Cellar/open-mpi
     fi
     if [[ $TASK == "swig" ]]; then
         brew install swig

From 6a626770993a18b1e310bb7e581080cc39c78724 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Thu, 25 Apr 2024 10:33:16 +0200
Subject: [PATCH 09/10] Revert

---
 .ci/setup.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.ci/setup.sh b/.ci/setup.sh
index 195e9e6ea2ee..ec9e3f114655 100755
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -24,7 +24,6 @@ if [[ $OS_NAME == "macos" ]]; then
     fi
     if [[ $TASK == "mpi" ]]; then
         brew install open-mpi
-        tree /usr/local/Cellar/open-mpi
     fi
     if [[ $TASK == "swig" ]]; then
         brew install swig

From 92ff59e928ed11a52da0f96182584ddbfa523587 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 29 Apr 2024 10:38:12 -0500
Subject: [PATCH 10/10] Apply suggestions from code review

---
 tests/python_package_test/test_engine.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index be53b6810e56..29210b94b4a1 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1073,6 +1073,7 @@ def test_early_stopping_min_delta_via_global_params(early_stopping_min_delta):
     num_trees = 5
     params = {
         "num_trees": num_trees,
+        "num_leaves": 5,
         "objective": "binary",
         "metric": "None",
         "verbose": -1,
@@ -1082,8 +1083,7 @@ def test_early_stopping_min_delta_via_global_params(early_stopping_min_delta):
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
-    valid_set_name = "valid_set"
-    gbm = lgb.train(params, lgb_train, feval=decreasing_metric, valid_sets=lgb_eval, valid_names=valid_set_name)
+    gbm = lgb.train(params, lgb_train, feval=decreasing_metric, valid_sets=lgb_eval)
     if early_stopping_min_delta == 0:
         assert gbm.best_iteration == num_trees
     else: