diff --git a/tests/toolkit/conftest.py b/tests/toolkit/conftest.py
index 395eef79..aeb7c883 100644
--- a/tests/toolkit/conftest.py
+++ b/tests/toolkit/conftest.py
@@ -16,8 +16,7 @@ def ts_data():
{
"id": nreps(["A", "B", "C"], 50),
"id2": nreps(["XX", "YY", "ZZ"], 50),
- "timestamp": [datetime(2021, 1, 1) + timedelta(days=i) for i in range(50)]
- * 3,
+ "timestamp": [datetime(2021, 1, 1) + timedelta(days=i) for i in range(50)] * 3,
"value1": range(150),
"value2": np.arange(150) / 3 + 10,
}
diff --git a/tests/toolkit/test_dataset.py b/tests/toolkit/test_dataset.py
index 27cc7bb9..388879c7 100644
--- a/tests/toolkit/test_dataset.py
+++ b/tests/toolkit/test_dataset.py
@@ -40,8 +40,7 @@ def ts_data_with_categorical():
return pd.DataFrame(
{
"id": nreps(["A", "B", "C"], 50),
- "timestamp": [datetime(2021, 1, 1) + timedelta(days=i) for i in range(50)]
- * 3,
+ "timestamp": [datetime(2021, 1, 1) + timedelta(days=i) for i in range(50)] * 3,
"value1": range(150),
"value2": np.arange(150) / 3 + 10,
"value3": np.arange(150) / 50 - 6,
@@ -74,9 +73,7 @@ def test_ts_padding(ts_data):
# test date handled
# integer
- assert df_padded.iloc[0]["time_int"] == df.iloc[0]["time_int"] - (
- context_length - df.shape[0]
- )
+ assert df_padded.iloc[0]["time_int"] == df.iloc[0]["time_int"] - (context_length - df.shape[0])
# date
df_padded = ts_padding(
@@ -86,9 +83,9 @@ def test_ts_padding(ts_data):
context_length=context_length,
)
- assert df_padded.iloc[0]["time_date"] == df.iloc[0]["time_date"] - (
- context_length - df.shape[0]
- ) * timedelta(days=1)
+ assert df_padded.iloc[0]["time_date"] == df.iloc[0]["time_date"] - (context_length - df.shape[0]) * timedelta(
+ days=1
+ )
def test_pretrain_df_dataset(ts_data):
@@ -106,7 +103,6 @@ def test_pretrain_df_dataset(ts_data):
def test_forecasting_df_dataset(ts_data_with_categorical):
-
prediction_length = 2
static_categorical_columns = ["color", "material"]
target_columns = ["value1"]
@@ -141,9 +137,7 @@ def test_forecasting_df_dataset(ts_data_with_categorical):
# check that we produce outputs for static categorical
assert "static_categorical_values" in ds[0]
- assert ds[0]["static_categorical_values"].shape == (
- len(static_categorical_columns),
- )
+ assert ds[0]["static_categorical_values"].shape == (len(static_categorical_columns),)
# check that frequency token is present
assert "freq_token" in ds[0]
diff --git a/tests/toolkit/test_time_series_forecasting_pipeline.py b/tests/toolkit/test_time_series_forecasting_pipeline.py
index 5709006c..7b87dec5 100644
--- a/tests/toolkit/test_time_series_forecasting_pipeline.py
+++ b/tests/toolkit/test_time_series_forecasting_pipeline.py
@@ -29,9 +29,7 @@ def test_forecasting_pipeline_forecasts():
freq="1h",
)
- dataset_path = (
- "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv"
- )
+ dataset_path = "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv"
test_end_index = 12 * 30 * 24 + 8 * 30 * 24
test_start_index = test_end_index - context_length
@@ -67,10 +65,7 @@ def test_forecasting_pipeline_forecasts():
assert forecasts_no_future.shape == (1, 2 * len(target_columns) + 1)
# check forecasts match
- assert (
- forecasts_no_future.iloc[0]["OT_prediction"]
- == forecasts.iloc[0]["OT_prediction"]
- )
+ assert forecasts_no_future.iloc[0]["OT_prediction"] == forecasts.iloc[0]["OT_prediction"]
# test that forecasts are properly exploded
forecast_pipeline = TimeSeriesForecastingPipeline(
diff --git a/tests/toolkit/test_time_series_preprocessor.py b/tests/toolkit/test_time_series_preprocessor.py
index d905bac9..42069611 100644
--- a/tests/toolkit/test_time_series_preprocessor.py
+++ b/tests/toolkit/test_time_series_preprocessor.py
@@ -26,9 +26,9 @@ def test_standard_scaler(sample_data):
# check shape preserved
result = scaler.fit_transform(sample_data[columns])
assert result.shape == sample_data[columns].shape
- expected = (
- sample_data[columns].values - np.mean(sample_data[columns].values, axis=0)
- ) / np.std(sample_data[columns].values, axis=0)
+ expected = (sample_data[columns].values - np.mean(sample_data[columns].values, axis=0)) / np.std(
+ sample_data[columns].values, axis=0
+ )
np.testing.assert_allclose(result, expected)
# check serialization
@@ -69,7 +69,6 @@ def test_ordinal_encoder(sample_data):
def test_time_series_preprocessor_encodes(sample_data):
-
static_categorical_columns = ["cat", "cat2"]
tsp = TimeSeriesPreprocessor(
@@ -85,11 +84,8 @@ def test_time_series_preprocessor_encodes(sample_data):
def test_augment_time_series(ts_data):
-
periods = 5
- a = extend_time_series(
- ts_data, timestamp_column="timestamp", grouping_columns=["id"], periods=periods
- )
+ a = extend_time_series(ts_data, timestamp_column="timestamp", grouping_columns=["id"], periods=periods)
# check that length increases by periods for each id
assert a.shape[0] == ts_data.shape[0] + 3 * periods
diff --git a/tsfm_public/toolkit/dataset.py b/tsfm_public/toolkit/dataset.py
index dea69b77..85098a88 100644
--- a/tsfm_public/toolkit/dataset.py
+++ b/tsfm_public/toolkit/dataset.py
@@ -50,22 +50,16 @@ def __init__(
y_cols = [y_cols]
if len(x_cols) > 0:
- assert is_cols_in_df(
- data_df, x_cols
- ), f"one or more {x_cols} is not in the list of data_df columns"
+ assert is_cols_in_df(data_df, x_cols), f"one or more {x_cols} is not in the list of data_df columns"
if len(y_cols) > 0:
- assert is_cols_in_df(
- data_df, y_cols
- ), f"one or more {y_cols} is not in the list of data_df columns"
+ assert is_cols_in_df(data_df, y_cols), f"one or more {y_cols} is not in the list of data_df columns"
if timestamp_column:
assert timestamp_column in list(
data_df.columns
), f"{timestamp_column} is not in the list of data_df columns"
- assert (
- timestamp_column not in x_cols
- ), f"{timestamp_column} should not be in the list of x_cols"
+ assert timestamp_column not in x_cols, f"{timestamp_column} should not be in the list of x_cols"
self.data_df = data_df
self.datetime_col = timestamp_column
@@ -162,9 +156,7 @@ def __init__(
**kwargs,
):
if len(id_columns) > 0:
- assert is_cols_in_df(
- data_df, id_columns
- ), f"{id_columns} is not in the data_df columns"
+ assert is_cols_in_df(data_df, id_columns), f"{id_columns} is not in the data_df columns"
self.timestamp_column = timestamp_column
self.id_columns = id_columns
@@ -424,9 +416,7 @@ def __init__(
)
# masking for conditional values which are not observed during future period
- self.y_mask_conditional = np.array(
- [(c in conditional_columns) for c in y_cols]
- )
+ self.y_mask_conditional = np.array([(c in conditional_columns) for c in y_cols])
# create a mask of x which masks targets
self.x_mask_targets = np.array([(c in target_columns) for c in x_cols])
@@ -451,10 +441,7 @@ def __getitem__(self, time_id):
# seq_y: batch_size x pred_len x num_x_cols
seq_y = self.y[
- time_id
- + self.context_length : time_id
- + self.context_length
- + self.prediction_length
+ time_id + self.context_length : time_id + self.context_length + self.prediction_length
].values
seq_y[:, self.y_mask_conditional] = 0
@@ -473,9 +460,7 @@ def __getitem__(self, time_id):
ret["freq_token"] = torch.tensor(self.frequency_token, dtype=torch.int)
if self.static_categorical_columns:
- categorical_values = self.data_df[
- self.static_categorical_columns
- ].values[0, :]
+ categorical_values = self.data_df[self.static_categorical_columns].values[0, :]
ret["static_categorical_values"] = np_to_torch(categorical_values)
return ret
@@ -543,7 +528,6 @@ def __init__(
input_columns: List[str] = [],
static_categorical_columns: List[str] = [],
):
-
self.target_columns = target_columns
self.input_columns = input_columns
self.static_categorical_columns = static_categorical_columns
@@ -566,9 +550,7 @@ def __init__(
def __getitem__(self, time_id):
# seq_x: batch_size x seq_len x num_x_cols
seq_x = self.X[time_id : time_id + self.context_length].values
- seq_y = self.y[
- time_id + self.context_length - 1 : time_id + self.context_length
- ].values.ravel()
+ seq_y = self.y[time_id + self.context_length - 1 : time_id + self.context_length].values.ravel()
# return _torch(seq_x, seq_y)
ret = {
@@ -582,9 +564,7 @@ def __getitem__(self, time_id):
ret["id"] = self.group_id
if self.static_categorical_columns:
- categorical_values = self.data_df[
- self.static_categorical_columns
- ].values[0, :]
+ categorical_values = self.data_df[self.static_categorical_columns].values[0, :]
ret["static_categorical_values"] = np_to_torch(categorical_values)
return ret
@@ -661,21 +641,15 @@ def ts_padding(
pad_df[c] = pad_df[c].astype(df.dtypes[c], copy=False)
if timestamp_column:
- if (df[timestamp_column].dtype.type == np.datetime64) or (
- df[timestamp_column].dtype == int
- ):
+ if (df[timestamp_column].dtype.type == np.datetime64) or (df[timestamp_column].dtype == int):
last_timestamp = df.iloc[0][timestamp_column]
period = df.iloc[1][timestamp_column] - df.iloc[0][timestamp_column]
- prepended_timestamps = [
- last_timestamp + offset * period for offset in range(-fill_length, 0)
- ]
+ prepended_timestamps = [last_timestamp + offset * period for offset in range(-fill_length, 0)]
pad_df[timestamp_column] = prepended_timestamps
else:
pad_df[timestamp_column] = None
# Ensure same type
- pad_df[timestamp_column] = pad_df[timestamp_column].astype(
- df[timestamp_column].dtype
- )
+ pad_df[timestamp_column] = pad_df[timestamp_column].astype(df[timestamp_column].dtype)
if id_columns:
id_values = df.iloc[0][id_columns].to_list()
@@ -716,6 +690,4 @@ def is_cols_in_df(df: pd.DataFrame, cols: List[str]) -> bool:
d6 = PretrainDFDataset(data_df=df, x_cols=["A", "B"], group_ids=["g1"], seq_len=2)
print(f"d6: {d6}")
- d7 = ForecastDFDataset(
- data_df=df, x_cols=["A", "B"], group_ids=["g1"], seq_len=2, pred_len=2
- )
+ d7 = ForecastDFDataset(data_df=df, x_cols=["A", "B"], group_ids=["g1"], seq_len=2, pred_len=2)
diff --git a/tsfm_public/toolkit/time_series_forecasting_pipeline.py b/tsfm_public/toolkit/time_series_forecasting_pipeline.py
index 8195c3b2..7c010f88 100644
--- a/tsfm_public/toolkit/time_series_forecasting_pipeline.py
+++ b/tsfm_public/toolkit/time_series_forecasting_pipeline.py
@@ -32,9 +32,7 @@
@add_end_docstrings(
- build_pipeline_init_args(
- has_tokenizer=False, has_feature_extractor=True, has_image_processor=False
- )
+ build_pipeline_init_args(has_tokenizer=False, has_feature_extractor=True, has_image_processor=False)
)
class TimeSeriesForecastingPipeline(Pipeline):
"""Hugging Face Pipeline for Time Series Forecasting"""
@@ -64,9 +62,7 @@ def _sanitize_parameters(self, **kwargs):
"""
context_length = kwargs.get("context_length", self.model.config.context_length)
- prediction_length = kwargs.get(
- "prediction_length", self.model.config.prediction_length
- )
+ prediction_length = kwargs.get("prediction_length", self.model.config.prediction_length)
preprocess_kwargs = {
"prediction_length": prediction_length,
@@ -174,9 +170,7 @@ def __call__(
return super().__call__(time_series, **kwargs)
- def preprocess(
- self, time_series, **kwargs
- ) -> Dict[str, Union[GenericTensor, List[Any]]]:
+ def preprocess(self, time_series, **kwargs) -> Dict[str, Union[GenericTensor, List[Any]]]:
"""Preprocess step
Load the data, if not already loaded, and then generate a pytorch dataset.
"""
@@ -204,16 +198,12 @@ def preprocess(
# do we need to check the timestamp column?
pass
else:
- raise ValueError(
- f"`future_time_series` of type {type(future_time_series)} is not supported."
- )
+ raise ValueError(f"`future_time_series` of type {type(future_time_series)} is not supported.")
# stack the time series
for c in future_time_series.columns:
if c not in time_series.columns:
- raise ValueError(
- f"Future time series input contains an unknown column {c}."
- )
+ raise ValueError(f"Future time series input contains an unknown column {c}.")
time_series = pd.concat((time_series, future_time_series), axis=0)
else:
@@ -274,11 +264,7 @@ def _forward(self, model_inputs, **kwargs):
# copy the other inputs
copy_inputs = True
- for k in [
- akey
- for akey in model_inputs.keys()
- if (akey not in model_input_keys) or copy_inputs
- ]:
+ for k in [akey for akey in model_inputs.keys() if (akey not in model_input_keys) or copy_inputs]:
model_outputs[k] = model_inputs[k]
return model_outputs
@@ -290,20 +276,14 @@ def postprocess(self, input, **kwargs):
"""
out = {}
- model_output_key = (
- "prediction_outputs"
- if "prediction_outputs" in input.keys()
- else "prediction_logits"
- )
+ model_output_key = "prediction_outputs" if "prediction_outputs" in input.keys() else "prediction_logits"
# name the predictions of target columns
# outputs should only have size equal to target columns
prediction_columns = []
for i, c in enumerate(kwargs["target_columns"]):
prediction_columns.append(f"{c}_prediction")
- out[prediction_columns[-1]] = (
- input[model_output_key][:, :, i].numpy().tolist()
- )
+ out[prediction_columns[-1]] = input[model_output_key][:, :, i].numpy().tolist()
# provide the ground truth values for the targets
# when future is unknown, we will have augmented the provided dataframe with NaN values to cover the future
for i, c in enumerate(kwargs["target_columns"]):
diff --git a/tsfm_public/toolkit/time_series_preprocessor.py b/tsfm_public/toolkit/time_series_preprocessor.py
index 9739b623..e583e588 100644
--- a/tsfm_public/toolkit/time_series_preprocessor.py
+++ b/tsfm_public/toolkit/time_series_preprocessor.py
@@ -50,9 +50,7 @@ def to_json(self) -> str:
return json.dumps(self.to_dict())
@classmethod
- def from_dict(
- cls, feature_extractor_dict: Dict[str, Any], **kwargs
- ) -> "SKLearnFeatureExtractionBase":
+ def from_dict(cls, feature_extractor_dict: Dict[str, Any], **kwargs) -> "SKLearnFeatureExtractionBase":
""" """
t = cls()
@@ -121,9 +119,7 @@ def __init__(
# note base class __init__ methods sets all arguments as attributes
if not isinstance(id_columns, list):
- raise ValueError(
- f"Invalid argument provided for `id_columns`: {id_columns}"
- )
+ raise ValueError(f"Invalid argument provided for `id_columns`: {id_columns}")
self.id_columns = id_columns
self.timestamp_column = timestamp_column
@@ -216,10 +212,7 @@ def recursive_check_ndarray(dictionary):
elif isinstance(value, np.int64):
dictionary[key] = int(value)
elif isinstance(value, list):
- dictionary[key] = [
- vv.tolist() if isinstance(vv, np.ndarray) else vv
- for vv in value
- ]
+ dictionary[key] = [vv.tolist() if isinstance(vv, np.ndarray) else vv for vv in value]
elif isinstance(value, dict):
dictionary[key] = recursive_check_ndarray(value)
return dictionary
@@ -235,9 +228,7 @@ def recursive_check_ndarray(dictionary):
return json.dumps(dictionary, indent=2, sort_keys=True) + "\n"
@classmethod
- def from_dict(
- cls, feature_extractor_dict: Dict[str, Any], **kwargs
- ) -> "PreTrainedFeatureExtractor":
+ def from_dict(cls, feature_extractor_dict: Dict[str, Any], **kwargs) -> "PreTrainedFeatureExtractor":
"""
Instantiates a type of [`~feature_extraction_utils.FeatureExtractionMixin`] from a Python dictionary of
parameters.
@@ -355,9 +346,7 @@ def _get_groups(
Generator[Any, pd.DataFrame]: Group name and resulting pandas dataframe for the group.
"""
if self.id_columns:
- group_by_columns = (
- self.id_columns if len(self.id_columns) > 1 else self.id_columns[0]
- )
+ group_by_columns = self.id_columns if len(self.id_columns) > 1 else self.id_columns[0]
else:
group_by_columns = INTERNAL_ID_COLUMN
@@ -418,13 +407,10 @@ def _train_categorical_encoder(self, df: pd.DataFrame):
self.categorical_encoder.fit(df[cols_to_encode])
def get_frequency_token(self, token_name: str):
-
token = self.frequency_mapping.get(token_name, None)
if token is None:
- warn(
- f"Frequency token {token_name} was not found in the frequncy token mapping."
- )
+ warn(f"Frequency token {token_name} was not found in the frequncy token mapping.")
token = self.frequency_mapping["oov"]
return token
@@ -457,11 +443,7 @@ def exogenous_channel_indices(self) -> List[int]:
@property
def prediction_channel_indices(self) -> List[int]:
- return [
- i
- for i, c in enumerate(self._get_real_valued_dynamic_channels())
- if c in self.target_columns
- ]
+ return [i for i, c in enumerate(self._get_real_valued_dynamic_channels()) if c in self.target_columns]
def _check_dataset(self, dataset: Union[Dataset, pd.DataFrame]):
"""Basic checks for input dataset.
@@ -485,10 +467,7 @@ def _estimate_frequency(self, df: pd.DataFrame):
df_subset = df
# to do: make more robust
- self.freq = (
- df_subset[self.timestamp_column].iloc[-1]
- - df_subset[self.timestamp_column].iloc[-2]
- )
+ self.freq = df_subset[self.timestamp_column].iloc[-1] - df_subset[self.timestamp_column].iloc[-2]
else:
# no timestamp, assume sequential count?
self.freq = 1
@@ -539,15 +518,11 @@ def inverse_scale_func(grp, id_columns):
name = tuple(grp.iloc[0][id_columns].tolist())
else:
name = grp.iloc[0][id_columns]
- grp[cols_to_scale] = self.target_scaler_dict[name].inverse_transform(
- grp[cols_to_scale]
- )
+ grp[cols_to_scale] = self.target_scaler_dict[name].inverse_transform(grp[cols_to_scale])
return grp
if self.id_columns:
- id_columns = (
- self.id_columns if len(self.id_columns) > 1 else self.id_columns[0]
- )
+ id_columns = self.id_columns if len(self.id_columns) > 1 else self.id_columns[0]
else:
id_columns = INTERNAL_ID_COLUMN
@@ -586,20 +561,14 @@ def scale_func(grp, id_columns):
name = tuple(grp.iloc[0][id_columns].tolist())
else:
name = grp.iloc[0][id_columns]
- grp[self.target_columns] = self.target_scaler_dict[name].transform(
- grp[self.target_columns]
- )
+ grp[self.target_columns] = self.target_scaler_dict[name].transform(grp[self.target_columns])
if other_cols_to_scale:
- grp[other_cols_to_scale] = self.scaler_dict[name].transform(
- grp[other_cols_to_scale]
- )
+ grp[other_cols_to_scale] = self.scaler_dict[name].transform(grp[other_cols_to_scale])
return grp
if self.id_columns:
- id_columns = (
- self.id_columns if len(self.id_columns) > 1 else self.id_columns[0]
- )
+ id_columns = self.id_columns if len(self.id_columns) > 1 else self.id_columns[0]
else:
id_columns = INTERNAL_ID_COLUMN
@@ -612,9 +581,7 @@ def scale_func(grp, id_columns):
cols_to_encode = self._get_columns_to_encode()
if self.encode_categorical and cols_to_encode:
if not self.categorical_encoder:
- raise RuntimeError(
- "Attempt to encode categorical columns, but the encoder has not been trained yet."
- )
+ raise RuntimeError("Attempt to encode categorical columns, but the encoder has not been trained yet.")
df[cols_to_encode] = self.categorical_encoder.transform(df[cols_to_encode])
return df
@@ -623,17 +590,13 @@ def scale_func(grp, id_columns):
def create_timestamps(
last_timestamp: Union[datetime.datetime, pd.Timestamp],
freq: Optional[Union[int, float, datetime.timedelta, pd.Timedelta, str]] = None,
- time_sequence: Optional[
- Union[List[int], List[float], List[datetime.datetime], List[pd.Timestamp]]
- ] = None,
+ time_sequence: Optional[Union[List[int], List[float], List[datetime.datetime], List[pd.Timestamp]]] = None,
periods: int = 1,
):
"""Simple utility to create a list of timestamps based on start, delta and number of periods"""
if freq is None and time_sequence is None:
- raise ValueError(
- "Neither `freq` nor `time_sequence` provided, cannot determine frequency."
- )
+ raise ValueError("Neither `freq` nor `time_sequence` provided, cannot determine frequency.")
if freq is None:
# to do: make more robust
@@ -674,7 +637,6 @@ def extend_time_series(
"""
def augment_one_series(group: Union[pd.Series, pd.DataFrame]):
-
last_timestamp = group[timestamp_column].iloc[-1]
new_data = pd.DataFrame(
@@ -697,9 +659,7 @@ def augment_one_series(group: Union[pd.Series, pd.DataFrame]):
if grouping_columns == []:
new_time_series = augment_one_series(time_series)
else:
- new_time_series = time_series.groupby(grouping_columns).apply(
- augment_one_series, include_groups=False
- )
+ new_time_series = time_series.groupby(grouping_columns).apply(augment_one_series, include_groups=False)
idx_names = list(new_time_series.index.names)
idx_names[-1] = "__delete"
new_time_series = new_time_series.reset_index(names=idx_names)
diff --git a/tsfm_public/toolkit/util.py b/tsfm_public/toolkit/util.py
index 1fc72fe9..270b100a 100644
--- a/tsfm_public/toolkit/util.py
+++ b/tsfm_public/toolkit/util.py
@@ -35,9 +35,7 @@ def select_by_timestamp(
"""
if not start_timestamp and not end_timestamp:
- raise ValueError(
- "At least one of start_timestamp or end_timestamp must be specified."
- )
+ raise ValueError("At least one of start_timestamp or end_timestamp must be specified.")
if not start_timestamp:
return df[df[timestamp_column] < end_timestamp]
@@ -45,10 +43,7 @@ def select_by_timestamp(
if not end_timestamp:
return df[df[timestamp_column] >= start_timestamp]
- return df[
- (df[timestamp_column] >= start_timestamp)
- & (df[timestamp_column] < end_timestamp)
- ]
+ return df[(df[timestamp_column] >= start_timestamp) & (df[timestamp_column] < end_timestamp)]
def select_by_index(
@@ -79,18 +74,12 @@ def select_by_index(
raise ValueError("At least one of start_index or end_index must be specified.")
if not id_columns:
- return _split_group_by_index(
- df, start_index=start_index, end_index=end_index
- ).copy()
+ return _split_group_by_index(df, start_index=start_index, end_index=end_index).copy()
groups = df.groupby(_get_groupby_columns(id_columns))
result = []
for name, group in groups:
- result.append(
- _split_group_by_index(
- group, name=name, start_index=start_index, end_index=end_index
- )
- )
+ result.append(_split_group_by_index(group, name=name, start_index=start_index, end_index=end_index))
return pd.concat(result)
@@ -127,9 +116,7 @@ def select_by_relative_fraction(
pd.DataFrame: Subset of the dataframe.
"""
if not start_fraction and not end_fraction:
- raise ValueError(
- "At least one of start_fraction or end_fraction must be specified."
- )
+ raise ValueError("At least one of start_fraction or end_fraction must be specified.")
if start_offset < 0:
raise ValueError("The value of start_offset should ne non-negative.")
@@ -215,9 +202,7 @@ def _split_group_by_fraction(
else:
end_index = None
- return _split_group_by_index(
- group_df=group_df, start_index=start_index, end_index=end_index
- )
+ return _split_group_by_index(group_df=group_df, start_index=start_index, end_index=end_index)
def convert_tsf_to_dataframe(
@@ -247,17 +232,13 @@ def convert_tsf_to_dataframe(
if not line.startswith("@data"):
line_content = line.split(" ")
if line.startswith("@attribute"):
- if (
- len(line_content) != 3
- ): # Attributes have both name and type
+ if len(line_content) != 3: # Attributes have both name and type
raise Exception("Invalid meta-data specification.")
col_names.append(line_content[1])
col_types.append(line_content[2])
else:
- if (
- len(line_content) != 2
- ): # Other meta-data have only values
+ if len(line_content) != 2: # Other meta-data have only values
raise Exception("Invalid meta-data specification.")
if line.startswith("@frequency"):
@@ -265,24 +246,18 @@ def convert_tsf_to_dataframe(
elif line.startswith("@horizon"):
forecast_horizon = int(line_content[1])
elif line.startswith("@missing"):
- contain_missing_values = bool(
- strtobool(line_content[1])
- )
+ contain_missing_values = bool(strtobool(line_content[1]))
elif line.startswith("@equallength"):
contain_equal_length = bool(strtobool(line_content[1]))
else:
if len(col_names) == 0:
- raise Exception(
- "Missing attribute section. Attribute section must come before data."
- )
+ raise Exception("Missing attribute section. Attribute section must come before data.")
found_data_tag = True
elif not line.startswith("#"):
if len(col_names) == 0:
- raise Exception(
- "Missing attribute section. Attribute section must come before data."
- )
+ raise Exception("Missing attribute section. Attribute section must come before data.")
elif not found_data_tag:
raise Exception("Missing @data tag.")
else:
@@ -315,9 +290,7 @@ def convert_tsf_to_dataframe(
else:
numeric_series.append(float(val))
- if numeric_series.count(replace_missing_vals_with) == len(
- numeric_series
- ):
+ if numeric_series.count(replace_missing_vals_with) == len(numeric_series):
raise Exception(
"All series values are missing. A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series."
)
@@ -331,9 +304,7 @@ def convert_tsf_to_dataframe(
elif col_types[i] == "string":
att_val = str(full_info[i])
elif col_types[i] == "date":
- att_val = datetime.strptime(
- full_info[i], "%Y-%m-%d %H-%M-%S"
- )
+ att_val = datetime.strptime(full_info[i], "%Y-%m-%d %H-%M-%S")
else:
raise Exception(
"Invalid attribute type."
diff --git a/tsfm_public/toolkit/visualization.py b/tsfm_public/toolkit/visualization.py
index a5629526..18af9cc8 100644
--- a/tsfm_public/toolkit/visualization.py
+++ b/tsfm_public/toolkit/visualization.py
@@ -83,9 +83,7 @@ def plot_ts_forecasting(
# plot true data
if not HAVE_SEABORN and plot_type == "seaborn":
- raise ValueError(
- "Please install the seaborn package if seaborn plots are needed."
- )
+ raise ValueError("Please install the seaborn package if seaborn plots are needed.")
# if plot_start > len(test_data_updated):
# logging.warning(
@@ -138,9 +136,7 @@ def plot_ts_forecasting(
# index into the predictions so that the end of the prediction coincides with the end of the ground truth
#
- predictions_end = (
- plot_range[-1] - prediction_length - context_length + 1
- ) # - context_length - prediction_length
+ predictions_end = plot_range[-1] - prediction_length - context_length + 1 # - context_length - prediction_length
predictions_start = plot_range[0] - context_length
@@ -154,9 +150,7 @@ def plot_ts_forecasting(
if plot_type == "plotly":
for i in plot_index:
start = forecast_data.iloc[i][timestamp_column]
- timestamps = pd.date_range(
- start, freq=periodicity, periods=prediction_length + 1
- )
+ timestamps = pd.date_range(start, freq=periodicity, periods=prediction_length + 1)
timestamp = timestamps[1:]
forecast_val = forecast_data.iloc[i][forecast_name]
plot_line(
diff --git a/tsfmhfdemos/neurips/app.py b/tsfmhfdemos/neurips/app.py
index d2db7a36..b60b1781 100644
--- a/tsfmhfdemos/neurips/app.py
+++ b/tsfmhfdemos/neurips/app.py
@@ -42,9 +42,7 @@ def tsforecasting_with_fmdls():
)
st.title(GLOBAL_CONFIG["title"])
- st.write(
- "", unsafe_allow_html=True
- )
+ st.write("", unsafe_allow_html=True)
st.write(GLOBAL_CONFIG["intro"])
@@ -103,26 +101,20 @@ def tsforecasting_with_fmdls():
for idx, channel in enumerate(dataset_meta["channel_plots"]):
# col = columns[idx % num_cols]
st.plotly_chart(
- model_util.create_figure(
- **dataset_meta, **model_meta, **approach_meta, channel=channel
- ),
+ model_util.create_figure(**dataset_meta, **model_meta, **approach_meta, channel=channel),
use_container_width=True,
fig_size=(1600, 200),
)
with col2:
st.subheader("Performance")
- df_perf = model_util.get_performance(
- metrics=METRICS, **dataset_meta, **model_meta, **approach_meta
- )
+ df_perf = model_util.get_performance(metrics=METRICS, **dataset_meta, **model_meta, **approach_meta)
df_perf_styled = df_perf.style.set_table_styles(
[
{"selector": "th", "props": "background-color: whitesmoke;"},
]
- ).format(
- precision=3
- ) # .style.hide(axis="index")
+ ).format(precision=3) # .style.hide(axis="index")
st.write(df_perf_styled.to_html(), unsafe_allow_html=True)
st.write("")
@@ -193,9 +185,7 @@ def tsforecasting_with_fmdls():
out = re.sub(r"\\textbf{([^&]*)}", r"\1", table_source)
out = re.sub(r"\\uline{([^&]*)}", r"\1", out)
out = re.sub(r"\s*|\$\\pm\$[^&]*|\\cline{.*}", "", out)
- vals = np.array([r.split("&")[3:] for r in out.split(r"\\")[2:30]]).astype(
- float
- )
+ vals = np.array([r.split("&")[3:] for r in out.split(r"\\")[2:30]]).astype(float)
leaderboard = pd.DataFrame(
index=pd.MultiIndex.from_product(
diff --git a/tsfmhfdemos/neurips/backends/v1/model_util.py b/tsfmhfdemos/neurips/backends/v1/model_util.py
index 1f650a04..dc482e08 100644
--- a/tsfmhfdemos/neurips/backends/v1/model_util.py
+++ b/tsfmhfdemos/neurips/backends/v1/model_util.py
@@ -149,9 +149,7 @@ def forecast(**kwargs) -> pd.DataFrame:
prep_path = get_preprocessor_path(**kwargs)
model_class = get_model_class(model_path)
- model = model_class.from_pretrained(
- model_path, num_input_channels=len(forecast_columns)
- )
+ model = model_class.from_pretrained(model_path, num_input_channels=len(forecast_columns))
forecast_pipeline = TimeSeriesForecastingPipeline(
model=model,
@@ -194,9 +192,7 @@ def create_figure(**kwargs) -> graph_objs.Figure:
model_class = get_model_class(model_path)
- model = model_class.from_pretrained(
- model_path, num_input_channels=len(forecast_columns)
- )
+ model = model_class.from_pretrained(model_path, num_input_channels=len(forecast_columns))
context_length = model.config.context_length
periodicity = kwargs["periodicity"]
channel = kwargs["channel"]