Skip to content

Commit

Permalink
feat(auto): support input_size (#451)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored Nov 20, 2024
1 parent 59f6cc5 commit 151f9e8
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 8 deletions.
4 changes: 4 additions & 0 deletions mlforecast/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ def fit(
h: int,
num_samples: int,
step_size: Optional[int] = None,
input_size: Optional[int] = None,
refit: Union[bool, int] = False,
loss: Optional[Callable[[DataFrame, DataFrame], float]] = None,
id_col: str = "unique_id",
Expand All @@ -470,6 +471,8 @@ def fit(
Number of trials to run
step_size : int, optional (default=None)
Step size between each cross validation window. If None it will be equal to `h`.
input_size : int, optional (default=None)
Maximum training samples per serie in each window. If None, will use an expanding window.
refit : bool or int (default=False)
Retrain model for each cross validation window.
If False, the models are trained at the beginning and then used to predict each window.
Expand Down Expand Up @@ -550,6 +553,7 @@ def config_fn(trial: optuna.Trial) -> Dict[str, Any]:
n_windows=n_windows,
h=h,
step_size=step_size,
input_size=input_size,
refit=refit,
id_col=id_col,
time_col=time_col,
Expand Down
4 changes: 4 additions & 0 deletions mlforecast/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def mlforecast_objective(
n_windows: int,
h: int,
step_size: Optional[int] = None,
input_size: Optional[int] = None,
refit: Union[bool, int] = False,
id_col: str = "unique_id",
time_col: str = "ds",
Expand All @@ -56,6 +57,8 @@ def mlforecast_objective(
Forecast horizon.
step_size : int, optional (default=None)
Step size between each cross validation window. If None it will be equal to `h`.
input_size : int, optional (default=None)
Maximum training samples per serie in each window. If None, will use an expanding window.
refit : bool or int (default=False)
Retrain model for each cross validation window.
If False, the models are trained at the beginning and then used to predict each window.
Expand Down Expand Up @@ -86,6 +89,7 @@ def objective(trial: optuna.Trial) -> float:
time_col=time_col,
freq=freq,
step_size=step_size,
input_size=input_size,
)
model_copy = clone(model)
model_params = config["model_params"]
Expand Down
49 changes: 43 additions & 6 deletions nbs/auto.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@
" h: int,\n",
" num_samples: int,\n",
" step_size: Optional[int] = None,\n",
" input_size: Optional[int] = None,\n",
" refit: Union[bool, int] = False,\n",
" loss: Optional[Callable[[DataFrame, DataFrame], float]] = None,\n",
" id_col: str = 'unique_id',\n",
Expand All @@ -548,6 +549,8 @@
" Number of trials to run\n",
" step_size : int, optional (default=None)\n",
" Step size between each cross validation window. If None it will be equal to `h`.\n",
" input_size : int, optional (default=None)\n",
" Maximum training samples per serie in each window. If None, will use an expanding window.\n",
" refit : bool or int (default=False)\n",
" Retrain model for each cross validation window.\n",
" If False, the models are trained at the beginning and then used to predict each window.\n",
Expand Down Expand Up @@ -625,6 +628,7 @@
" n_windows=n_windows,\n",
" h=h,\n",
" step_size=step_size,\n",
" input_size=input_size,\n",
" refit=refit,\n",
" id_col=id_col,\n",
" time_col=time_col,\n",
Expand Down Expand Up @@ -818,6 +822,7 @@
"> (df:Union[pandas.core.frame.DataFrame,polars.datafram\n",
"> e.frame.DataFrame], n_windows:int, h:int,\n",
"> num_samples:int, step_size:Optional[int]=None,\n",
"> input_size:Optional[int]=None,\n",
"> refit:Union[bool,int]=False, loss:Optional[Callable[[\n",
"> Union[pandas.core.frame.DataFrame,polars.dataframe.fr\n",
"> ame.DataFrame],Union[pandas.core.frame.DataFrame,pola\n",
Expand All @@ -839,6 +844,7 @@
"| h | int | | Forecast horizon. |\n",
"| num_samples | int | | Number of trials to run |\n",
"| step_size | Optional | None | Step size between each cross validation window. If None it will be equal to `h`. |\n",
"| input_size | Optional | None | Maximum training samples per serie in each window. If None, will use an expanding window. |\n",
"| refit | Union | False | Retrain model for each cross validation window.<br>If False, the models are trained at the beginning and then used to predict each window.<br>If positive int, the models are retrained every `refit` windows. |\n",
"| loss | Optional | None | Function that takes the validation and train dataframes and produces a float.<br>If `None` will use the average SMAPE across series. |\n",
"| id_col | str | unique_id | Column that identifies each serie. |\n",
Expand All @@ -861,6 +867,7 @@
"> (df:Union[pandas.core.frame.DataFrame,polars.datafram\n",
"> e.frame.DataFrame], n_windows:int, h:int,\n",
"> num_samples:int, step_size:Optional[int]=None,\n",
"> input_size:Optional[int]=None,\n",
"> refit:Union[bool,int]=False, loss:Optional[Callable[[\n",
"> Union[pandas.core.frame.DataFrame,polars.dataframe.fr\n",
"> ame.DataFrame],Union[pandas.core.frame.DataFrame,pola\n",
Expand All @@ -882,6 +889,7 @@
"| h | int | | Forecast horizon. |\n",
"| num_samples | int | | Number of trials to run |\n",
"| step_size | Optional | None | Step size between each cross validation window. If None it will be equal to `h`. |\n",
"| input_size | Optional | None | Maximum training samples per serie in each window. If None, will use an expanding window. |\n",
"| refit | Union | False | Retrain model for each cross validation window.<br>If False, the models are trained at the beginning and then used to predict each window.<br>If positive int, the models are retrained every `refit` windows. |\n",
"| loss | Optional | None | Function that takes the validation and train dataframes and produces a float.<br>If `None` will use the average SMAPE across series. |\n",
"| id_col | str | unique_id | Column that identifies each serie. |\n",
Expand Down Expand Up @@ -914,7 +922,7 @@
"text/markdown": [
"---\n",
"\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L574){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L592){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### AutoMLForecast.predict\n",
"\n",
Expand All @@ -934,7 +942,7 @@
"text/plain": [
"---\n",
"\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L574){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L592){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### AutoMLForecast.predict\n",
"\n",
Expand Down Expand Up @@ -972,7 +980,7 @@
"text/markdown": [
"---\n",
"\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L606){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L624){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### AutoMLForecast.save\n",
"\n",
Expand All @@ -988,7 +996,7 @@
"text/plain": [
"---\n",
"\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L606){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L624){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### AutoMLForecast.save\n",
"\n",
Expand Down Expand Up @@ -1022,7 +1030,7 @@
"text/markdown": [
"---\n",
"\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L616){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L634){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### AutoMLForecast.forecast_fitted_values\n",
"\n",
Expand All @@ -1040,7 +1048,7 @@
"text/plain": [
"---\n",
"\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L616){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L634){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### AutoMLForecast.forecast_fitted_values\n",
"\n",
Expand Down Expand Up @@ -1072,6 +1080,8 @@
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"\n",
"import pandas as pd\n",
"from datasetsforecast.m4 import M4, M4Evaluation, M4Info\n",
"from sklearn.linear_model import Ridge\n",
Expand Down Expand Up @@ -1788,6 +1798,33 @@
" preds2.rename(columns={'id': 'unique_id', 'time': 'ds'}),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "caf14191-370e-41b9-a322-2c9b0a7a5f6e",
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"# using input_size\n",
"fit_kwargs = dict(\n",
" n_windows=3,\n",
" h=h,\n",
" num_samples=5,\n",
" optimize_kwargs={'timeout': 60}, \n",
")\n",
"\n",
"start = time.perf_counter()\n",
"auto_mlf.fit(df=train, **fit_kwargs)\n",
"no_limit = time.perf_counter() - start\n",
"\n",
"start = time.perf_counter()\n",
"auto_mlf.fit(df=train, input_size=50, **fit_kwargs)\n",
"with_limit = time.perf_counter() - start\n",
"\n",
"assert with_limit < no_limit"
]
}
],
"metadata": {
Expand Down
16 changes: 14 additions & 2 deletions nbs/optimization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
" n_windows: int,\n",
" h: int,\n",
" step_size: Optional[int] = None,\n",
" input_size: Optional[int] = None,\n",
" refit: Union[bool, int] = False,\n",
" id_col: str = 'unique_id',\n",
" time_col: str = 'ds',\n",
Expand All @@ -107,6 +108,8 @@
" Forecast horizon.\n",
" step_size : int, optional (default=None)\n",
" Step size between each cross validation window. If None it will be equal to `h`.\n",
" input_size : int, optional (default=None)\n",
" Maximum training samples per serie in each window. If None, will use an expanding window.\n",
" refit : bool or int (default=False)\n",
" Retrain model for each cross validation window.\n",
" If False, the models are trained at the beginning and then used to predict each window.\n",
Expand Down Expand Up @@ -136,6 +139,7 @@
" time_col=time_col,\n",
" freq=freq,\n",
" step_size=step_size,\n",
" input_size=input_size,\n",
" )\n",
" model_copy = clone(model)\n",
" model_params = config['model_params']\n",
Expand Down Expand Up @@ -232,11 +236,13 @@
"> ial._trial.Trial],Dict[str,Any]], loss:Callable,\n",
"> model:sklearn.base.BaseEstimator,\n",
"> freq:Union[int,str], n_windows:int, h:int,\n",
"> step_size:Optional[int]=None,\n",
"> input_size:Optional[int]=None,\n",
"> refit:Union[bool,int]=False,\n",
"> id_col:str='unique_id', time_col:str='ds',\n",
"> target_col:str='y')\n",
"\n",
"optuna objective function for the MLForecast class\n",
"*optuna objective function for the MLForecast class*\n",
"\n",
"| | **Type** | **Default** | **Details** |\n",
"| -- | -------- | ----------- | ----------- |\n",
Expand All @@ -247,6 +253,8 @@
"| freq | Union | | pandas' or polars' offset alias or integer denoting the frequency of the series. |\n",
"| n_windows | int | | Number of windows to evaluate. |\n",
"| h | int | | Forecast horizon. |\n",
"| step_size | Optional | None | Step size between each cross validation window. If None it will be equal to `h`. |\n",
"| input_size | Optional | None | |\n",
"| refit | Union | False | Retrain model for each cross validation window.<br>If False, the models are trained at the beginning and then used to predict each window.<br>If positive int, the models are retrained every `refit` windows. |\n",
"| id_col | str | unique_id | Column that identifies each serie. |\n",
"| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n",
Expand All @@ -266,11 +274,13 @@
"> ial._trial.Trial],Dict[str,Any]], loss:Callable,\n",
"> model:sklearn.base.BaseEstimator,\n",
"> freq:Union[int,str], n_windows:int, h:int,\n",
"> step_size:Optional[int]=None,\n",
"> input_size:Optional[int]=None,\n",
"> refit:Union[bool,int]=False,\n",
"> id_col:str='unique_id', time_col:str='ds',\n",
"> target_col:str='y')\n",
"\n",
"optuna objective function for the MLForecast class\n",
"*optuna objective function for the MLForecast class*\n",
"\n",
"| | **Type** | **Default** | **Details** |\n",
"| -- | -------- | ----------- | ----------- |\n",
Expand All @@ -281,6 +291,8 @@
"| freq | Union | | pandas' or polars' offset alias or integer denoting the frequency of the series. |\n",
"| n_windows | int | | Number of windows to evaluate. |\n",
"| h | int | | Forecast horizon. |\n",
"| step_size | Optional | None | Step size between each cross validation window. If None it will be equal to `h`. |\n",
"| input_size | Optional | None | |\n",
"| refit | Union | False | Retrain model for each cross validation window.<br>If False, the models are trained at the beginning and then used to predict each window.<br>If positive int, the models are retrained every `refit` windows. |\n",
"| id_col | str | unique_id | Column that identifies each serie. |\n",
"| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n",
Expand Down

0 comments on commit 151f9e8

Please sign in to comment.