Added MASE metric and y_train parameter to objectives (#4221)

* add MASE metric * add MASE tests * increase count and define y_train * add MASE and SMAPE to api reference * remove positive_only function from MAPE/SMAPE/MASE * remove pd->np and metrics from non neg tests * remove MAPE from non negative tests * add y_train parameter * check for 0 values * swap np array to pd series
alteryx · Jul 18, 2023 · 09bd86e · 09bd86e
1 parent 4d20d58
commit 09bd86e
Show file tree

Hide file tree

Showing 14 changed files with 436 additions and 79 deletions.
diff --git a/docs/source/api_index.rst b/docs/source/api_index.rst
@@ -409,7 +409,9 @@ Regression Objectives
 
     evalml.objectives.ExpVariance
     evalml.objectives.MAE
+    evalml.objectives.MASE
     evalml.objectives.MAPE
+    evalml.objectives.SMAPE
     evalml.objectives.MSE
     evalml.objectives.MeanSquaredLogError
     evalml.objectives.MedianAE

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -19,6 +19,7 @@ Release Notes
     * Enhancements
         * Add run_feature_selection to AutoMLSearch and Default Algorithm :pr:`4210`
         * Added ``SMAPE`` to the standard metrics for time series problems :pr:`4220`
+        * Added ``MASE`` metric and ``y_train`` parameter to objectives :pr:`4221`
     * Fixes
         * `IDColumnsDataCheck` now works with Unknown data type :pr:`4203`
     * Changes

diff --git a/evalml/objectives/__init__.py b/evalml/objectives/__init__.py
@@ -15,6 +15,7 @@
     AUC,
     F1,
     MAE,
+    MASE,
     MAPE,
     SMAPE,
     MSE,

diff --git a/evalml/objectives/cost_benefit_matrix.py b/evalml/objectives/cost_benefit_matrix.py
@@ -35,12 +35,20 @@ def __init__(self, true_positive, true_negative, false_positive, false_negative)
         self.false_positive = false_positive
         self.false_negative = false_negative
 
-    def objective_function(self, y_true, y_predicted, X=None, sample_weight=None):
+    def objective_function(
+        self,
+        y_true,
+        y_predicted,
+        y_train=None,
+        X=None,
+        sample_weight=None,
+    ):
         """Calculates cost-benefit of the using the predicted and true values.
 
         Args:
             y_predicted (pd.Series): Predicted labels.
             y_true (pd.Series): True labels.
+            y_train (pd.Series): Ignored.
             X (pd.DataFrame): Ignored.
             sample_weight (pd.DataFrame): Ignored.
 

diff --git a/evalml/objectives/fraud_cost.py b/evalml/objectives/fraud_cost.py
@@ -36,12 +36,20 @@ def __init__(
         self.fraud_payout_percentage = fraud_payout_percentage
         self.amount_col = amount_col
 
-    def objective_function(self, y_true, y_predicted, X, sample_weight=None):
+    def objective_function(
+        self,
+        y_true,
+        y_predicted,
+        X,
+        y_train=None,
+        sample_weight=None,
+    ):
         """Calculate amount lost to fraud per transaction given predictions, true values, and dataframe with transaction amount.
 
         Args:
             y_predicted (pd.Series): Predicted fraud labels.
             y_true (pd.Series): True fraud labels.
+            y_train (pd.Series): Ignored.
             X (pd.DataFrame): Data with transaction amounts.
             sample_weight (pd.DataFrame): Ignored.
 

diff --git a/evalml/objectives/lead_scoring.py b/evalml/objectives/lead_scoring.py
@@ -25,12 +25,20 @@ def __init__(self, true_positives=1, false_positives=-1):
         self.true_positives = true_positives
         self.false_positives = false_positives
 
-    def objective_function(self, y_true, y_predicted, X=None, sample_weight=None):
+    def objective_function(
+        self,
+        y_true,
+        y_predicted,
+        y_train=None,
+        X=None,
+        sample_weight=None,
+    ):
         """Calculate the profit per lead.
 
         Args:
-            y_predicted (pd.Series): Predicted labels
-            y_true (pd.Series): True labels
+            y_predicted (pd.Series): Predicted labels.
+            y_true (pd.Series): True labels.
+            y_train (pd.Series): Ignored.
             X (pd.DataFrame): Ignored.
             sample_weight (pd.DataFrame): Ignored.
 

diff --git a/evalml/objectives/objective_base.py b/evalml/objectives/objective_base.py
@@ -61,12 +61,20 @@ def expected_range(cls):
 
     @classmethod
     @abstractmethod
-    def objective_function(cls, y_true, y_predicted, X=None, sample_weight=None):
+    def objective_function(
+        cls,
+        y_true,
+        y_predicted,
+        y_train=None,
+        X=None,
+        sample_weight=None,
+    ):
         """Computes the relative value of the provided predictions compared to the actual labels, according a specified metric.
 
         Args:
             y_predicted (pd.Series): Predicted values of length [n_samples]
             y_true (pd.Series): Actual class labels of length [n_samples]
+            y_train (pd.Series): Observed training values of length [n_samples]
             X (pd.DataFrame or np.ndarray): Extra data of shape [n_samples, n_features] necessary to calculate score
             sample_weight (pd.DataFrame or np.ndarray): Sample weights used in computing objective value result
 
@@ -79,12 +87,13 @@ def positive_only(cls):
         """If True, this objective is only valid for positive data. Defaults to False."""
         return False
 
-    def score(self, y_true, y_predicted, X=None, sample_weight=None):
+    def score(self, y_true, y_predicted, y_train=None, X=None, sample_weight=None):
         """Returns a numerical score indicating performance based on the differences between the predicted and actual values.
 
         Args:
             y_predicted (pd.Series): Predicted values of length [n_samples]
             y_true (pd.Series): Actual class labels of length [n_samples]
+            y_train (pd.Series): Observed training values of length [n_samples]
             X (pd.DataFrame or np.ndarray): Extra data of shape [n_samples, n_features] necessary to calculate score
             sample_weight (pd.DataFrame or np.ndarray): Sample weights used in computing objective value result
 
@@ -93,12 +102,15 @@ def score(self, y_true, y_predicted, X=None, sample_weight=None):
         """
         if X is not None:
             X = self._standardize_input_type(X)
+        if y_train is not None:
+            y_train = self._standardize_input_type(y_train)
         y_true = self._standardize_input_type(y_true)
         y_predicted = self._standardize_input_type(y_predicted)
         self.validate_inputs(y_true, y_predicted)
         return self.objective_function(
             y_true,
             y_predicted,
+            y_train=y_train,
             X=X,
             sample_weight=sample_weight,
         )
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ @@
         AUC,
         F1,
         MAE,
+        MASE,
         MAPE,
         SMAPE,
         MSE,
@@ Expand Down @@