etna-team · d-a-bunin · Feb 3, 2025 · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,9 +36,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 - Fix possibility of silent handling of duplicate features when updating dataset with `TSDataset.update_columns_from_pandas` ([#522](https://github.com/etna-team/etna/pull/552))
-- 
-- 
-- 
+- **Breaking:** Rename `TSDataset.index` to `TSDataset.timestamps` ([#593](https://github.com/etna-team/etna/pull/593))
+- **Breaking:** Rename `TSDataset.add_columns_from_pandas` to `TSDataset.add_features_from_pandas` ([#593](https://github.com/etna-team/etna/pull/593))
+- **Breaking:** Rename `TSDataset.update_columns_from_pandas` to `TSDataset.update_features_from_pandas` ([#593](https://github.com/etna-team/etna/pull/593))
 - 
 - 
 - 

diff --git a/etna/analysis/eda/plots.py b/etna/analysis/eda/plots.py
@@ -333,7 +333,7 @@
     if segments is None:
         segments = sorted(ts.segments)
 
-    holidays_df = _create_holidays_df(holidays, index=ts.index, as_is=as_is)
+    holidays_df = _create_holidays_df(holidays, index=ts.timestamps, as_is=as_is)
 
     _, ax = _prepare_axes(num_plots=len(segments), columns_num=columns_num, figsize=figsize)
 
@@ -633,7 +633,7 @@
           default value is "1M"
 
         * integer for data with integer timestamp, groups are formed by ``timestamp // freq``,
-          default value is ``ts.index.max() + 1``
+          default value is ``ts.timestamps.max() + 1``
 
     n_rows:
         maximum number of rows to plot
@@ -657,7 +657,7 @@
     if ts.freq is None:
         # make only one group
         if freq is None:
-            freq = ts.index.max() + 1
+            freq = ts.timestamps.max() + 1
         grouped_data = df_full.groupby(df_full.timestamp // freq)
     else:
         if freq is None:

diff --git a/etna/clustering/distances/euclidean_distance.py b/etna/clustering/distances/euclidean_distance.py
@@ -59,7 +59,7 @@ def _get_average(self, ts: "TSDataset") -> pd.DataFrame:
         pd.DataFrame:
             dataframe with columns "timestamp" and "target" that contains the series
         """
-        centroid = pd.DataFrame({"timestamp": ts.index.values, "target": ts.df.mean(axis=1).values})
+        centroid = pd.DataFrame({"timestamp": ts.timestamps.values, "target": ts.df.mean(axis=1).values})
         return centroid
 
 

diff --git a/etna/commands/forecast_command.py b/etna/commands/forecast_command.py
@@ -31,7 +31,7 @@ def compute_horizon(horizon: int, forecast_params: Dict[str, Any], tsdataset: TS
         forecast_start_timestamp = _check_timestamp_param(
             param=forecast_params["start_timestamp"], param_name="start_timestamp", freq=tsdataset.freq
         )
-        train_end_timestamp = tsdataset.index.max()
+        train_end_timestamp = tsdataset.timestamps.max()
 
         if forecast_start_timestamp <= train_end_timestamp:
             raise ValueError("Parameter `start_timestamp` should greater than end of training dataset!")

diff --git a/etna/commands/utils.py b/etna/commands/utils.py
@@ -49,7 +49,7 @@ def _max_n_folds_forecast(pipeline: Pipeline, context_size: int, ts: Optional[TS
         else:
             ts = pipeline.ts
 
-    num_points = len(ts.index)
+    num_points = len(ts.timestamps)
     horizon = pipeline.horizon
 
     return _estimate_n_folds(num_points=num_points, horizon=horizon, stride=horizon, context_size=context_size)
@@ -65,7 +65,7 @@ def _max_n_folds_backtest(pipeline: Pipeline, context_size: int, ts: TSDataset,
     if backtest_with_intervals:
         raise NotImplementedError("Number of folds estimation for backtest with intervals is not implemented!")
 
-    num_points = len(ts.index)
+    num_points = len(ts.timestamps)
 
     horizon = pipeline.horizon
     stride = method_kwargs.get("stride", horizon)

diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py
@@ -452,7 +452,7 @@ def make_future(
             # check if we have enough values in regressors
             # TODO: check performance
             if self.regressors:
-                future_index = df.index.difference(self.index)
+                future_index = df.index.difference(self.timestamps)
                 for segment in self.segments:
                     regressors_index = self.df_exog.loc[:, pd.IndexSlice[segment, self.regressors]].index
                     if not np.all(future_index.isin(regressors_index)):
@@ -1258,7 +1258,7 @@ def train_test_split(
 
         return train, test
 
-    def update_columns_from_pandas(self, df_update: pd.DataFrame):
+    def update_features_from_pandas(self, df_update: pd.DataFrame):
         """Update the existing columns in the dataset with the new values from pandas dataframe.
 
         Before updating columns in ``df``, columns of ``df_update`` will be cropped by the last timestamp in ``df``.
@@ -1298,7 +1298,7 @@ def update_columns_from_pandas(self, df_update: pd.DataFrame):
 
         self.df.iloc[:, column_idx] = df
 
-    def add_columns_from_pandas(
+    def add_features_from_pandas(
         self, df_update: pd.DataFrame, update_exog: bool = False, regressors: Optional[List[str]] = None
     ):
         """Update the dataset with the new columns from pandas dataframe.
@@ -1374,15 +1374,15 @@ def drop_features(self, features: List[str], drop_from_exog: bool = False):
         self._regressors = list(set(self._regressors) - features_set)
 
     @property
-    def index(self) -> pd.Index:
+    def timestamps(self) -> pd.Index:
         """Return TSDataset timestamp index.
 
         Returns
         -------
         :
             timestamp index of TSDataset
         """
-        return self.df.index
+        return self.df.index.copy()
 
     def level_names(self) -> Optional[List[str]]:
         """Return names of the levels in the hierarchical structure."""
@@ -1911,4 +1911,4 @@ def size(self) -> Tuple[int, int, Optional[int]]:
         :
             Tuple of TSDataset sizes
         """
-        return len(self.index), len(self.segments), len(self.features)
+        return len(self.timestamps), len(self.segments), len(self.features)
diff --git a/etna/metrics/base.py b/etna/metrics/base.py
@@ -253,7 +253,7 @@ def _validate_index(y_true: TSDataset, y_pred: TSDataset):
         ValueError:
             If there are mismatches in ``y_true`` and ``y_pred`` timestamps
         """
-        if not y_true.index.equals(y_pred.index):
+        if not y_true.timestamps.equals(y_pred.timestamps):
             raise ValueError("y_true and y_pred have different timestamps")
 
     def _validate_nans(self, y_true: TSDataset, y_pred: TSDataset):

diff --git a/etna/models/base.py b/etna/models/base.py
@@ -670,7 +670,7 @@ def forecast(self, ts: "TSDataset", prediction_size: int, return_components: boo
             raise NotImplementedError("This mode isn't currently implemented!")
 
         expected_length = prediction_size + self.encoder_length
-        if len(ts.index) < expected_length:
+        if len(ts.timestamps) < expected_length:
             raise ValueError(
                 "Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataset!"
             )
@@ -682,7 +682,7 @@ def forecast(self, ts: "TSDataset", prediction_size: int, return_components: boo
             dropna=False,
         )
         predictions = self.raw_predict(test_dataset)
-        end_idx = len(ts.index)
+        end_idx = len(ts.timestamps)
         future_ts = ts.tsdataset_idx_slice(start_idx=end_idx - prediction_size, end_idx=end_idx)
         for (segment, feature_nm), value in predictions.items():
             # we don't want to change dtype after assignment, but there can happen cast to float32

diff --git a/etna/models/nn/chronos/base.py b/etna/models/nn/chronos/base.py
@@ -196,7 +196,7 @@ def _forecast(
         if return_components:
             raise NotImplementedError("This mode isn't currently implemented!")
 
-        max_context_size = len(ts.index) - prediction_size
+        max_context_size = len(ts.timestamps) - prediction_size
         if max_context_size <= 0:
             raise ValueError("Dataset doesn't have any context timestamps.")
 
@@ -220,7 +220,7 @@ def _forecast(
                 **predict_kwargs,
             )  # shape [n_segments, prediction_length, n_quantiles], [n_segments, prediction_length]
 
-        end_idx = len(ts.index)
+        end_idx = len(ts.timestamps)
         future_ts = ts.tsdataset_idx_slice(start_idx=end_idx - prediction_size, end_idx=end_idx)
 
         if prediction_interval:

diff --git a/etna/models/nn/timesfm.py b/etna/models/nn/timesfm.py
@@ -226,7 +226,7 @@ def forecast(
         if return_components:
             raise NotImplementedError("This mode isn't currently implemented!")
 
-        max_context_size = len(ts.index) - prediction_size
+        max_context_size = len(ts.timestamps) - prediction_size
         if max_context_size <= 0:
             raise ValueError("Dataset doesn't have any context timestamps.")
 
@@ -235,15 +235,15 @@ def forecast(
 
         self.tfm._set_horizon(prediction_size)
 
-        end_idx = len(ts.index)
+        end_idx = len(ts.timestamps)
 
         all_exog = self._exog_columns()
         df_slice = ts.df.loc[:, pd.IndexSlice[:, all_exog + ["target"]]]
         first_valid_index = (
             df_slice.isna().any(axis=1).idxmin()
         )  # If all timestamps contains NaNs, idxmin() returns the first timestamp
 
-        target_df = df_slice.loc[first_valid_index : ts.index[-prediction_size - 1], pd.IndexSlice[:, "target"]]
+        target_df = df_slice.loc[first_valid_index : ts.timestamps[-prediction_size - 1], pd.IndexSlice[:, "target"]]
 
         nan_segment_mask = target_df.isna().any()
         if nan_segment_mask.any():

diff --git a/etna/pipeline/autoregressive_pipeline.py b/etna/pipeline/autoregressive_pipeline.py
@@ -138,7 +138,7 @@ def _forecast(self, ts: TSDataset, return_components: bool) -> TSDataset:
         target_components_dfs = []
         for idx_start in range(0, self.horizon, self.step):
             current_step = min(self.step, self.horizon - idx_start)
-            current_idx_border = ts.index.shape[0] + idx_start
+            current_idx_border = ts.timestamps.shape[0] + idx_start
             current_ts = TSDataset(
                 df=prediction_df.iloc[:current_idx_border],
                 freq=ts.freq,

diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py
@@ -188,7 +188,7 @@ def validate_on_dataset(self, ts: TSDataset, horizon: int):
         ValueError:
             Last target timestamp should be not later than horizon steps after last train timestamp
         """
-        timestamps = ts.index.to_list()
+        timestamps = ts.timestamps.to_list()
 
         if self.first_train_timestamp is not None and self.first_train_timestamp not in timestamps:
             raise ValueError("First train timestamp isn't present in a given dataset!")
@@ -559,7 +559,7 @@ def _make_predict_timestamps(
         end_timestamp = _check_timestamp_param(param=end_timestamp, param_name="end_timestamp", freq=ts.freq)
 
         min_timestamp = ts.describe()["start_timestamp"].max()
-        max_timestamp = ts.index[-1]
+        max_timestamp = ts.timestamps[-1]
 
         if start_timestamp is None:
             start_timestamp = min_timestamp
@@ -719,7 +719,7 @@ def _generate_masks_from_n_folds(
             assert_never(mode)
 
         masks = []
-        dataset_timestamps = list(ts.index)
+        dataset_timestamps = list(ts.timestamps)
         min_timestamp_idx, max_timestamp_idx = 0, len(dataset_timestamps)
         for offset in range(n_folds, 0, -1):
             min_train_idx = min_timestamp_idx + (n_folds - offset) * stride * constant_history_length
@@ -756,7 +756,7 @@ def _generate_folds_datasets(
         ts: TSDataset, masks: List[FoldMask], horizon: int
     ) -> Generator[Tuple[TSDataset, TSDataset], None, None]:
         """Generate folds."""
-        timestamps = list(ts.index)
+        timestamps = list(ts.timestamps)
         for mask in masks:
             min_train_idx = timestamps.index(mask.first_train_timestamp)
             max_train_idx = timestamps.index(mask.last_train_timestamp)
@@ -824,10 +824,10 @@ def _process_fold_forecast(
             logger.start_experiment(job_type="crossval", group=str(fold_number))
 
             fold: Dict[str, Any] = {}
-            for stage_name, stage_df in zip(("train", "test"), (train, test)):
+            for stage_name, stage_ts in zip(("train", "test"), (train, test)):
                 fold[f"{stage_name}_timerange"] = {}
-                fold[f"{stage_name}_timerange"]["start"] = stage_df.index.min()
-                fold[f"{stage_name}_timerange"]["end"] = stage_df.index.max()
+                fold[f"{stage_name}_timerange"]["start"] = stage_ts.timestamps.min()
+                fold[f"{stage_name}_timerange"]["end"] = stage_ts.timestamps.max()
 
             forecast.df = forecast.df.loc[mask.target_timestamps]
             test.df = test.df.loc[mask.target_timestamps]
@@ -906,7 +906,7 @@ def _prepare_fold_masks(
                 ts=ts, n_folds=masks, horizon=self.horizon, mode=mode, stride=stride
             )
         for i, mask in enumerate(masks):
-            mask.first_train_timestamp = mask.first_train_timestamp if mask.first_train_timestamp else ts.index[0]
+            mask.first_train_timestamp = mask.first_train_timestamp if mask.first_train_timestamp else ts.timestamps[0]
             masks[i] = mask
         for mask in masks:
             mask.validate_on_dataset(ts=ts, horizon=self.horizon)

diff --git a/etna/pipeline/mixins.py b/etna/pipeline/mixins.py
@@ -64,7 +64,7 @@ def _create_ts(
     def _determine_prediction_size(
         self, ts: TSDataset, start_timestamp: Union[pd.Timestamp, int], end_timestamp: Union[pd.Timestamp, int]
     ) -> int:
-        timestamp_indices = pd.Series(np.arange(len(ts.index)), index=ts.index)
+        timestamp_indices = pd.Series(np.arange(len(ts.timestamps)), index=ts.timestamps)
         timestamps = timestamp_indices.loc[start_timestamp:end_timestamp]
         return len(timestamps)
 

diff --git a/etna/transforms/base.py b/etna/transforms/base.py
@@ -44,13 +44,13 @@ def _update_dataset(self, ts: TSDataset, columns_before: Set[str], df_transforme
             ts.drop_features(features=columns_to_remove, drop_from_exog=False)
         if len(columns_to_add) != 0:
             new_regressors = self.get_regressors_info()
-            ts.add_columns_from_pandas(
+            ts.add_features_from_pandas(
                 df_update=df_transformed.loc[pd.IndexSlice[:], pd.IndexSlice[:, columns_to_add]],
                 update_exog=False,
                 regressors=new_regressors,
             )
         if len(columns_to_update) != 0:
-            ts.update_columns_from_pandas(
+            ts.update_features_from_pandas(
                 df_update=df_transformed.loc[pd.IndexSlice[:], pd.IndexSlice[:, columns_to_update]]
             )
         return ts

diff --git a/etna/transforms/decomposition/dft_based.py b/etna/transforms/decomposition/dft_based.py
@@ -136,8 +136,8 @@ def fit(self, ts: TSDataset) -> "FourierDecomposeTransform":
         :
             the fitted transform instance.
         """
-        self._first_timestamp = ts.index.min()
-        self._last_timestamp = ts.index.max()
+        self._first_timestamp = ts.timestamps.min()
+        self._last_timestamp = ts.timestamps.max()
 
         self._check_segments(df=ts[..., self.in_column].droplevel("feature", axis=1))
 
@@ -159,19 +159,19 @@ def transform(self, ts: TSDataset) -> TSDataset:
         if self._first_timestamp is None:
             raise ValueError("Transform is not fitted!")
 
-        if ts.index.min() < self._first_timestamp:
+        if ts.timestamps.min() < self._first_timestamp:
             raise ValueError(
                 f"First index of the dataset to be transformed must be larger or equal than {self._first_timestamp}!"
             )
 
-        if ts.index.min() > self._last_timestamp:
+        if ts.timestamps.min() > self._last_timestamp:
             raise ValueError(
                 f"Dataset to be transformed must contain historical observations in range {self._first_timestamp} - {self._last_timestamp}"
             )
 
         segment_df = ts[..., self.in_column].droplevel("feature", axis=1)
 
-        ts_max_timestamp = ts.index.max()
+        ts_max_timestamp = ts.timestamps.max()
         if ts_max_timestamp > self._last_timestamp:
             future_steps = determine_num_steps(self._last_timestamp, ts_max_timestamp, freq=ts.freq)
             segment_df.iloc[-future_steps:] = np.nan
@@ -192,7 +192,7 @@ def transform(self, ts: TSDataset) -> TSDataset:
 
         segment_components = pd.concat(segment_components, axis=1)
 
-        ts.add_columns_from_pandas(segment_components)
+        ts.add_features_from_pandas(segment_components)
 
         return ts
 

diff --git a/etna/transforms/decomposition/model_based.py b/etna/transforms/decomposition/model_based.py
@@ -125,8 +125,8 @@ def fit(self, ts: TSDataset) -> "ModelDecomposeTransform":
         :
             the fitted transform instance.
         """
-        self._first_timestamp = ts.index.min()
-        self._last_timestamp = ts.index.max()
+        self._first_timestamp = ts.timestamps.min()
+        self._last_timestamp = ts.timestamps.max()
 
         ts = self._prepare_ts(ts=ts)
 
@@ -149,20 +149,20 @@ def transform(self, ts: TSDataset) -> TSDataset:
         if self._first_timestamp is None:
             raise ValueError("Transform is not fitted!")
 
-        if ts.index.min() < self._first_timestamp:
+        if ts.timestamps.min() < self._first_timestamp:
             raise ValueError(
                 f"First index of the dataset to be transformed must be larger or equal than {self._first_timestamp}!"
             )
 
-        if ts.index.min() > self._last_timestamp:
+        if ts.timestamps.min() > self._last_timestamp:
             raise ValueError(
                 f"Dataset to be transformed must contain historical observations in range {self._first_timestamp} - {self._last_timestamp}"
             )
 
         decompose_ts = self._prepare_ts(ts=ts)
 
         future_steps = 0
-        ts_max_timestamp = decompose_ts.index.max()
+        ts_max_timestamp = decompose_ts.timestamps.max()
         if ts_max_timestamp > self._last_timestamp:
             future_steps = determine_num_steps(self._last_timestamp, ts_max_timestamp, freq=decompose_ts.freq)
             decompose_ts.df = decompose_ts.df.loc[: self._last_timestamp]

diff --git a/examples/102-backtest.ipynb b/examples/102-backtest.ipynb
@@ -1254,7 +1254,7 @@
    "source": [
     "# 2 With specific mask\n",
     "window_size = 85\n",
-    "first_train_timestamp = ts.index.min() + np.timedelta64(100, \"D\")\n",
+    "first_train_timestamp = ts.timestamps.min() + np.timedelta64(100, \"D\")\n",
     "last_train_timestamp = first_train_timestamp + np.timedelta64(window_size, \"D\")\n",
     "target_timestamps = pd.date_range(start=last_train_timestamp + np.timedelta64(1, \"D\"), periods=horizon)\n",
     "mask = FoldMask(\n",
@@ -1381,7 +1381,7 @@
     "def sliding_window_masks(window_size, n_folds):\n",
     "    masks = []\n",
     "    for n in range(n_folds):\n",
-    "        first_train_timestamp = ts.index.min() + np.timedelta64(100, \"D\") + np.timedelta64(n, \"D\")\n",
+    "        first_train_timestamp = ts.timestamps.min() + np.timedelta64(100, \"D\") + np.timedelta64(n, \"D\")\n",
     "        last_train_timestamp = first_train_timestamp + np.timedelta64(window_size, \"D\")\n",
     "        target_timestamps = pd.date_range(start=last_train_timestamp + np.timedelta64(1, \"D\"), periods=horizon)\n",
     "        mask = FoldMask(\n",

diff --git a/examples/206-clustering.ipynb b/examples/206-clustering.ipynb
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -444,7 +444,7 @@ def const_ts_anomal() -> TSDataset:
 @pytest.fixture
 def ts_diff_endings(example_reg_tsds):
     ts = deepcopy(example_reg_tsds)
-    ts.loc[ts.index[-5] :, pd.IndexSlice["segment_1", "target"]] = np.NAN
+    ts.loc[ts.timestamps[-5] :, pd.IndexSlice["segment_1", "target"]] = np.NAN
     return ts