Skip to content

Commit

Permalink
simpler solution, add test
Browse files Browse the repository at this point in the history
  • Loading branch information
wgifford committed Jul 11, 2024
1 parent 5e0ff94 commit c67ffcd
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 28 deletions.
8 changes: 8 additions & 0 deletions tests/toolkit/test_time_series_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,14 @@ def test_get_datasets_with_frequency_token(ts_data):
assert train[0]["freq_token"] == DEFAULT_FREQUENCY_MAPPING["d"]


def test_get_frequency_token():
tsp = TimeSeriesPreprocessor(timestamp_column="date")

assert tsp.get_frequency_token("1h") == DEFAULT_FREQUENCY_MAPPING["h"]
assert tsp.get_frequency_token("h") == DEFAULT_FREQUENCY_MAPPING["h"]
assert tsp.get_frequency_token("0 days 01:00:00") == DEFAULT_FREQUENCY_MAPPING["h"]


def test_id_columns_and_scaling_id_columns(ts_data_runs):
df = ts_data_runs

Expand Down
45 changes: 17 additions & 28 deletions tsfm_public/toolkit/time_series_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import pandas as pd
from datasets import Dataset
from deprecated import deprecated
from pandas.tseries.frequencies import to_offset
from sklearn.preprocessing import MinMaxScaler as MinMaxScaler_
from sklearn.preprocessing import OrdinalEncoder as OrdinalEncoder_
from sklearn.preprocessing import StandardScaler as StandardScaler_
Expand Down Expand Up @@ -197,9 +198,6 @@ def __init__(
self.target_scaler_dict = {}
self.categorical_encoder = None
self.frequency_mapping = frequency_mapping

self._timedelta_map = self._get_timedelta_map()

self.freq = freq

kwargs["processor_class"] = self.__class__.__name__
Expand Down Expand Up @@ -231,22 +229,6 @@ def _validate_columns(self):
"A column name should appear only once in `target_columns`, `observable_colums`, `control_columnts`, `conditional_columns`, `categorical_columns`, and `static_columns`."
)

def _get_timedelta_map(
self,
) -> Dict[str, str]:
"""Get a mapping that relates timedeltas to frequencies in the frequency map.
Returns:
Dict[str, str]: Dictionary of mappings from timedelta strings to frequency token names.
"""
td_map = {}
for k, v in self.frequency_mapping.items():
if k == "oov":
continue
td_str = str(pd._libs.tslibs.timedeltas.Timedelta(k if k[0].isdigit() else f"1{k}"))
td_map[td_str] = k
return td_map

def to_dict(self) -> Dict[str, Any]:
"""
Serializes this instance to a Python dictionary.
Expand Down Expand Up @@ -464,16 +446,23 @@ def _train_categorical_encoder(self, df: pd.DataFrame):

def get_frequency_token(self, token_name: str):
token = self.frequency_mapping.get(token_name, None)
if token is not None:
return token

# try lookup using timedelta directly
if token is None:
token_name_mapped = self._timedelta_map.get(token_name, None)
if token_name_mapped is not None:
token = self.frequency_mapping.get(token_name_mapped, None)

if token is None:
warn(f"Frequency token {token_name} was not found in the frequncy token mapping.")
token = self.frequency_mapping["oov"]
# try to map as a frequency string
try:
token_name_offs = to_offset(token_name).freqstr
token = self.frequency_mapping.get(token_name_offs, None)
return token
except ValueError:
# lastly try to map the timedelta to a frequency string
token_name_td = pd._libs.tslibs.timedeltas.Timedelta(token_name)
token_name_offs = to_offset(token_name_td).freqstr
token = self.frequency_mapping.get(token_name_offs, None)
return token

warn(f"Frequency token {token_name} was not found in the frequncy token mapping.")
token = self.frequency_mapping["oov"]

return token

Expand Down

0 comments on commit c67ffcd

Please sign in to comment.