diff --git a/tests/toolkit/conftest.py b/tests/toolkit/conftest.py index 395eef79..aeb7c883 100644 --- a/tests/toolkit/conftest.py +++ b/tests/toolkit/conftest.py @@ -16,8 +16,7 @@ def ts_data(): { "id": nreps(["A", "B", "C"], 50), "id2": nreps(["XX", "YY", "ZZ"], 50), - "timestamp": [datetime(2021, 1, 1) + timedelta(days=i) for i in range(50)] - * 3, + "timestamp": [datetime(2021, 1, 1) + timedelta(days=i) for i in range(50)] * 3, "value1": range(150), "value2": np.arange(150) / 3 + 10, } diff --git a/tests/toolkit/test_dataset.py b/tests/toolkit/test_dataset.py index 27cc7bb9..388879c7 100644 --- a/tests/toolkit/test_dataset.py +++ b/tests/toolkit/test_dataset.py @@ -40,8 +40,7 @@ def ts_data_with_categorical(): return pd.DataFrame( { "id": nreps(["A", "B", "C"], 50), - "timestamp": [datetime(2021, 1, 1) + timedelta(days=i) for i in range(50)] - * 3, + "timestamp": [datetime(2021, 1, 1) + timedelta(days=i) for i in range(50)] * 3, "value1": range(150), "value2": np.arange(150) / 3 + 10, "value3": np.arange(150) / 50 - 6, @@ -74,9 +73,7 @@ def test_ts_padding(ts_data): # test date handled # integer - assert df_padded.iloc[0]["time_int"] == df.iloc[0]["time_int"] - ( - context_length - df.shape[0] - ) + assert df_padded.iloc[0]["time_int"] == df.iloc[0]["time_int"] - (context_length - df.shape[0]) # date df_padded = ts_padding( @@ -86,9 +83,9 @@ def test_ts_padding(ts_data): context_length=context_length, ) - assert df_padded.iloc[0]["time_date"] == df.iloc[0]["time_date"] - ( - context_length - df.shape[0] - ) * timedelta(days=1) + assert df_padded.iloc[0]["time_date"] == df.iloc[0]["time_date"] - (context_length - df.shape[0]) * timedelta( + days=1 + ) def test_pretrain_df_dataset(ts_data): @@ -106,7 +103,6 @@ def test_pretrain_df_dataset(ts_data): def test_forecasting_df_dataset(ts_data_with_categorical): - prediction_length = 2 static_categorical_columns = ["color", "material"] target_columns = ["value1"] @@ -141,9 +137,7 @@ def test_forecasting_df_dataset(ts_data_with_categorical): # check that we produce outputs for static categorical assert "static_categorical_values" in ds[0] - assert ds[0]["static_categorical_values"].shape == ( - len(static_categorical_columns), - ) + assert ds[0]["static_categorical_values"].shape == (len(static_categorical_columns),) # check that frequency token is present assert "freq_token" in ds[0] diff --git a/tests/toolkit/test_time_series_forecasting_pipeline.py b/tests/toolkit/test_time_series_forecasting_pipeline.py index 5709006c..7b87dec5 100644 --- a/tests/toolkit/test_time_series_forecasting_pipeline.py +++ b/tests/toolkit/test_time_series_forecasting_pipeline.py @@ -29,9 +29,7 @@ def test_forecasting_pipeline_forecasts(): freq="1h", ) - dataset_path = ( - "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv" - ) + dataset_path = "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv" test_end_index = 12 * 30 * 24 + 8 * 30 * 24 test_start_index = test_end_index - context_length @@ -67,10 +65,7 @@ def test_forecasting_pipeline_forecasts(): assert forecasts_no_future.shape == (1, 2 * len(target_columns) + 1) # check forecasts match - assert ( - forecasts_no_future.iloc[0]["OT_prediction"] - == forecasts.iloc[0]["OT_prediction"] - ) + assert forecasts_no_future.iloc[0]["OT_prediction"] == forecasts.iloc[0]["OT_prediction"] # test that forecasts are properly exploded forecast_pipeline = TimeSeriesForecastingPipeline( diff --git a/tests/toolkit/test_time_series_preprocessor.py b/tests/toolkit/test_time_series_preprocessor.py index d905bac9..42069611 100644 --- a/tests/toolkit/test_time_series_preprocessor.py +++ b/tests/toolkit/test_time_series_preprocessor.py @@ -26,9 +26,9 @@ def test_standard_scaler(sample_data): # check shape preserved result = scaler.fit_transform(sample_data[columns]) assert result.shape == sample_data[columns].shape - expected = ( - sample_data[columns].values - np.mean(sample_data[columns].values, axis=0) - ) / np.std(sample_data[columns].values, axis=0) + expected = (sample_data[columns].values - np.mean(sample_data[columns].values, axis=0)) / np.std( + sample_data[columns].values, axis=0 + ) np.testing.assert_allclose(result, expected) # check serialization @@ -69,7 +69,6 @@ def test_ordinal_encoder(sample_data): def test_time_series_preprocessor_encodes(sample_data): - static_categorical_columns = ["cat", "cat2"] tsp = TimeSeriesPreprocessor( @@ -85,11 +84,8 @@ def test_time_series_preprocessor_encodes(sample_data): def test_augment_time_series(ts_data): - periods = 5 - a = extend_time_series( - ts_data, timestamp_column="timestamp", grouping_columns=["id"], periods=periods - ) + a = extend_time_series(ts_data, timestamp_column="timestamp", grouping_columns=["id"], periods=periods) # check that length increases by periods for each id assert a.shape[0] == ts_data.shape[0] + 3 * periods diff --git a/tsfm_public/toolkit/dataset.py b/tsfm_public/toolkit/dataset.py index dea69b77..85098a88 100644 --- a/tsfm_public/toolkit/dataset.py +++ b/tsfm_public/toolkit/dataset.py @@ -50,22 +50,16 @@ def __init__( y_cols = [y_cols] if len(x_cols) > 0: - assert is_cols_in_df( - data_df, x_cols - ), f"one or more {x_cols} is not in the list of data_df columns" + assert is_cols_in_df(data_df, x_cols), f"one or more {x_cols} is not in the list of data_df columns" if len(y_cols) > 0: - assert is_cols_in_df( - data_df, y_cols - ), f"one or more {y_cols} is not in the list of data_df columns" + assert is_cols_in_df(data_df, y_cols), f"one or more {y_cols} is not in the list of data_df columns" if timestamp_column: assert timestamp_column in list( data_df.columns ), f"{timestamp_column} is not in the list of data_df columns" - assert ( - timestamp_column not in x_cols - ), f"{timestamp_column} should not be in the list of x_cols" + assert timestamp_column not in x_cols, f"{timestamp_column} should not be in the list of x_cols" self.data_df = data_df self.datetime_col = timestamp_column @@ -162,9 +156,7 @@ def __init__( **kwargs, ): if len(id_columns) > 0: - assert is_cols_in_df( - data_df, id_columns - ), f"{id_columns} is not in the data_df columns" + assert is_cols_in_df(data_df, id_columns), f"{id_columns} is not in the data_df columns" self.timestamp_column = timestamp_column self.id_columns = id_columns @@ -424,9 +416,7 @@ def __init__( ) # masking for conditional values which are not observed during future period - self.y_mask_conditional = np.array( - [(c in conditional_columns) for c in y_cols] - ) + self.y_mask_conditional = np.array([(c in conditional_columns) for c in y_cols]) # create a mask of x which masks targets self.x_mask_targets = np.array([(c in target_columns) for c in x_cols]) @@ -451,10 +441,7 @@ def __getitem__(self, time_id): # seq_y: batch_size x pred_len x num_x_cols seq_y = self.y[ - time_id - + self.context_length : time_id - + self.context_length - + self.prediction_length + time_id + self.context_length : time_id + self.context_length + self.prediction_length ].values seq_y[:, self.y_mask_conditional] = 0 @@ -473,9 +460,7 @@ def __getitem__(self, time_id): ret["freq_token"] = torch.tensor(self.frequency_token, dtype=torch.int) if self.static_categorical_columns: - categorical_values = self.data_df[ - self.static_categorical_columns - ].values[0, :] + categorical_values = self.data_df[self.static_categorical_columns].values[0, :] ret["static_categorical_values"] = np_to_torch(categorical_values) return ret @@ -543,7 +528,6 @@ def __init__( input_columns: List[str] = [], static_categorical_columns: List[str] = [], ): - self.target_columns = target_columns self.input_columns = input_columns self.static_categorical_columns = static_categorical_columns @@ -566,9 +550,7 @@ def __init__( def __getitem__(self, time_id): # seq_x: batch_size x seq_len x num_x_cols seq_x = self.X[time_id : time_id + self.context_length].values - seq_y = self.y[ - time_id + self.context_length - 1 : time_id + self.context_length - ].values.ravel() + seq_y = self.y[time_id + self.context_length - 1 : time_id + self.context_length].values.ravel() # return _torch(seq_x, seq_y) ret = { @@ -582,9 +564,7 @@ def __getitem__(self, time_id): ret["id"] = self.group_id if self.static_categorical_columns: - categorical_values = self.data_df[ - self.static_categorical_columns - ].values[0, :] + categorical_values = self.data_df[self.static_categorical_columns].values[0, :] ret["static_categorical_values"] = np_to_torch(categorical_values) return ret @@ -661,21 +641,15 @@ def ts_padding( pad_df[c] = pad_df[c].astype(df.dtypes[c], copy=False) if timestamp_column: - if (df[timestamp_column].dtype.type == np.datetime64) or ( - df[timestamp_column].dtype == int - ): + if (df[timestamp_column].dtype.type == np.datetime64) or (df[timestamp_column].dtype == int): last_timestamp = df.iloc[0][timestamp_column] period = df.iloc[1][timestamp_column] - df.iloc[0][timestamp_column] - prepended_timestamps = [ - last_timestamp + offset * period for offset in range(-fill_length, 0) - ] + prepended_timestamps = [last_timestamp + offset * period for offset in range(-fill_length, 0)] pad_df[timestamp_column] = prepended_timestamps else: pad_df[timestamp_column] = None # Ensure same type - pad_df[timestamp_column] = pad_df[timestamp_column].astype( - df[timestamp_column].dtype - ) + pad_df[timestamp_column] = pad_df[timestamp_column].astype(df[timestamp_column].dtype) if id_columns: id_values = df.iloc[0][id_columns].to_list() @@ -716,6 +690,4 @@ def is_cols_in_df(df: pd.DataFrame, cols: List[str]) -> bool: d6 = PretrainDFDataset(data_df=df, x_cols=["A", "B"], group_ids=["g1"], seq_len=2) print(f"d6: {d6}") - d7 = ForecastDFDataset( - data_df=df, x_cols=["A", "B"], group_ids=["g1"], seq_len=2, pred_len=2 - ) + d7 = ForecastDFDataset(data_df=df, x_cols=["A", "B"], group_ids=["g1"], seq_len=2, pred_len=2) diff --git a/tsfm_public/toolkit/time_series_forecasting_pipeline.py b/tsfm_public/toolkit/time_series_forecasting_pipeline.py index 8195c3b2..7c010f88 100644 --- a/tsfm_public/toolkit/time_series_forecasting_pipeline.py +++ b/tsfm_public/toolkit/time_series_forecasting_pipeline.py @@ -32,9 +32,7 @@ @add_end_docstrings( - build_pipeline_init_args( - has_tokenizer=False, has_feature_extractor=True, has_image_processor=False - ) + build_pipeline_init_args(has_tokenizer=False, has_feature_extractor=True, has_image_processor=False) ) class TimeSeriesForecastingPipeline(Pipeline): """Hugging Face Pipeline for Time Series Forecasting""" @@ -64,9 +62,7 @@ def _sanitize_parameters(self, **kwargs): """ context_length = kwargs.get("context_length", self.model.config.context_length) - prediction_length = kwargs.get( - "prediction_length", self.model.config.prediction_length - ) + prediction_length = kwargs.get("prediction_length", self.model.config.prediction_length) preprocess_kwargs = { "prediction_length": prediction_length, @@ -174,9 +170,7 @@ def __call__( return super().__call__(time_series, **kwargs) - def preprocess( - self, time_series, **kwargs - ) -> Dict[str, Union[GenericTensor, List[Any]]]: + def preprocess(self, time_series, **kwargs) -> Dict[str, Union[GenericTensor, List[Any]]]: """Preprocess step Load the data, if not already loaded, and then generate a pytorch dataset. """ @@ -204,16 +198,12 @@ def preprocess( # do we need to check the timestamp column? pass else: - raise ValueError( - f"`future_time_series` of type {type(future_time_series)} is not supported." - ) + raise ValueError(f"`future_time_series` of type {type(future_time_series)} is not supported.") # stack the time series for c in future_time_series.columns: if c not in time_series.columns: - raise ValueError( - f"Future time series input contains an unknown column {c}." - ) + raise ValueError(f"Future time series input contains an unknown column {c}.") time_series = pd.concat((time_series, future_time_series), axis=0) else: @@ -274,11 +264,7 @@ def _forward(self, model_inputs, **kwargs): # copy the other inputs copy_inputs = True - for k in [ - akey - for akey in model_inputs.keys() - if (akey not in model_input_keys) or copy_inputs - ]: + for k in [akey for akey in model_inputs.keys() if (akey not in model_input_keys) or copy_inputs]: model_outputs[k] = model_inputs[k] return model_outputs @@ -290,20 +276,14 @@ def postprocess(self, input, **kwargs): """ out = {} - model_output_key = ( - "prediction_outputs" - if "prediction_outputs" in input.keys() - else "prediction_logits" - ) + model_output_key = "prediction_outputs" if "prediction_outputs" in input.keys() else "prediction_logits" # name the predictions of target columns # outputs should only have size equal to target columns prediction_columns = [] for i, c in enumerate(kwargs["target_columns"]): prediction_columns.append(f"{c}_prediction") - out[prediction_columns[-1]] = ( - input[model_output_key][:, :, i].numpy().tolist() - ) + out[prediction_columns[-1]] = input[model_output_key][:, :, i].numpy().tolist() # provide the ground truth values for the targets # when future is unknown, we will have augmented the provided dataframe with NaN values to cover the future for i, c in enumerate(kwargs["target_columns"]): diff --git a/tsfm_public/toolkit/time_series_preprocessor.py b/tsfm_public/toolkit/time_series_preprocessor.py index 9739b623..e583e588 100644 --- a/tsfm_public/toolkit/time_series_preprocessor.py +++ b/tsfm_public/toolkit/time_series_preprocessor.py @@ -50,9 +50,7 @@ def to_json(self) -> str: return json.dumps(self.to_dict()) @classmethod - def from_dict( - cls, feature_extractor_dict: Dict[str, Any], **kwargs - ) -> "SKLearnFeatureExtractionBase": + def from_dict(cls, feature_extractor_dict: Dict[str, Any], **kwargs) -> "SKLearnFeatureExtractionBase": """ """ t = cls() @@ -121,9 +119,7 @@ def __init__( # note base class __init__ methods sets all arguments as attributes if not isinstance(id_columns, list): - raise ValueError( - f"Invalid argument provided for `id_columns`: {id_columns}" - ) + raise ValueError(f"Invalid argument provided for `id_columns`: {id_columns}") self.id_columns = id_columns self.timestamp_column = timestamp_column @@ -216,10 +212,7 @@ def recursive_check_ndarray(dictionary): elif isinstance(value, np.int64): dictionary[key] = int(value) elif isinstance(value, list): - dictionary[key] = [ - vv.tolist() if isinstance(vv, np.ndarray) else vv - for vv in value - ] + dictionary[key] = [vv.tolist() if isinstance(vv, np.ndarray) else vv for vv in value] elif isinstance(value, dict): dictionary[key] = recursive_check_ndarray(value) return dictionary @@ -235,9 +228,7 @@ def recursive_check_ndarray(dictionary): return json.dumps(dictionary, indent=2, sort_keys=True) + "\n" @classmethod - def from_dict( - cls, feature_extractor_dict: Dict[str, Any], **kwargs - ) -> "PreTrainedFeatureExtractor": + def from_dict(cls, feature_extractor_dict: Dict[str, Any], **kwargs) -> "PreTrainedFeatureExtractor": """ Instantiates a type of [`~feature_extraction_utils.FeatureExtractionMixin`] from a Python dictionary of parameters. @@ -355,9 +346,7 @@ def _get_groups( Generator[Any, pd.DataFrame]: Group name and resulting pandas dataframe for the group. """ if self.id_columns: - group_by_columns = ( - self.id_columns if len(self.id_columns) > 1 else self.id_columns[0] - ) + group_by_columns = self.id_columns if len(self.id_columns) > 1 else self.id_columns[0] else: group_by_columns = INTERNAL_ID_COLUMN @@ -418,13 +407,10 @@ def _train_categorical_encoder(self, df: pd.DataFrame): self.categorical_encoder.fit(df[cols_to_encode]) def get_frequency_token(self, token_name: str): - token = self.frequency_mapping.get(token_name, None) if token is None: - warn( - f"Frequency token {token_name} was not found in the frequncy token mapping." - ) + warn(f"Frequency token {token_name} was not found in the frequncy token mapping.") token = self.frequency_mapping["oov"] return token @@ -457,11 +443,7 @@ def exogenous_channel_indices(self) -> List[int]: @property def prediction_channel_indices(self) -> List[int]: - return [ - i - for i, c in enumerate(self._get_real_valued_dynamic_channels()) - if c in self.target_columns - ] + return [i for i, c in enumerate(self._get_real_valued_dynamic_channels()) if c in self.target_columns] def _check_dataset(self, dataset: Union[Dataset, pd.DataFrame]): """Basic checks for input dataset. @@ -485,10 +467,7 @@ def _estimate_frequency(self, df: pd.DataFrame): df_subset = df # to do: make more robust - self.freq = ( - df_subset[self.timestamp_column].iloc[-1] - - df_subset[self.timestamp_column].iloc[-2] - ) + self.freq = df_subset[self.timestamp_column].iloc[-1] - df_subset[self.timestamp_column].iloc[-2] else: # no timestamp, assume sequential count? self.freq = 1 @@ -539,15 +518,11 @@ def inverse_scale_func(grp, id_columns): name = tuple(grp.iloc[0][id_columns].tolist()) else: name = grp.iloc[0][id_columns] - grp[cols_to_scale] = self.target_scaler_dict[name].inverse_transform( - grp[cols_to_scale] - ) + grp[cols_to_scale] = self.target_scaler_dict[name].inverse_transform(grp[cols_to_scale]) return grp if self.id_columns: - id_columns = ( - self.id_columns if len(self.id_columns) > 1 else self.id_columns[0] - ) + id_columns = self.id_columns if len(self.id_columns) > 1 else self.id_columns[0] else: id_columns = INTERNAL_ID_COLUMN @@ -586,20 +561,14 @@ def scale_func(grp, id_columns): name = tuple(grp.iloc[0][id_columns].tolist()) else: name = grp.iloc[0][id_columns] - grp[self.target_columns] = self.target_scaler_dict[name].transform( - grp[self.target_columns] - ) + grp[self.target_columns] = self.target_scaler_dict[name].transform(grp[self.target_columns]) if other_cols_to_scale: - grp[other_cols_to_scale] = self.scaler_dict[name].transform( - grp[other_cols_to_scale] - ) + grp[other_cols_to_scale] = self.scaler_dict[name].transform(grp[other_cols_to_scale]) return grp if self.id_columns: - id_columns = ( - self.id_columns if len(self.id_columns) > 1 else self.id_columns[0] - ) + id_columns = self.id_columns if len(self.id_columns) > 1 else self.id_columns[0] else: id_columns = INTERNAL_ID_COLUMN @@ -612,9 +581,7 @@ def scale_func(grp, id_columns): cols_to_encode = self._get_columns_to_encode() if self.encode_categorical and cols_to_encode: if not self.categorical_encoder: - raise RuntimeError( - "Attempt to encode categorical columns, but the encoder has not been trained yet." - ) + raise RuntimeError("Attempt to encode categorical columns, but the encoder has not been trained yet.") df[cols_to_encode] = self.categorical_encoder.transform(df[cols_to_encode]) return df @@ -623,17 +590,13 @@ def scale_func(grp, id_columns): def create_timestamps( last_timestamp: Union[datetime.datetime, pd.Timestamp], freq: Optional[Union[int, float, datetime.timedelta, pd.Timedelta, str]] = None, - time_sequence: Optional[ - Union[List[int], List[float], List[datetime.datetime], List[pd.Timestamp]] - ] = None, + time_sequence: Optional[Union[List[int], List[float], List[datetime.datetime], List[pd.Timestamp]]] = None, periods: int = 1, ): """Simple utility to create a list of timestamps based on start, delta and number of periods""" if freq is None and time_sequence is None: - raise ValueError( - "Neither `freq` nor `time_sequence` provided, cannot determine frequency." - ) + raise ValueError("Neither `freq` nor `time_sequence` provided, cannot determine frequency.") if freq is None: # to do: make more robust @@ -674,7 +637,6 @@ def extend_time_series( """ def augment_one_series(group: Union[pd.Series, pd.DataFrame]): - last_timestamp = group[timestamp_column].iloc[-1] new_data = pd.DataFrame( @@ -697,9 +659,7 @@ def augment_one_series(group: Union[pd.Series, pd.DataFrame]): if grouping_columns == []: new_time_series = augment_one_series(time_series) else: - new_time_series = time_series.groupby(grouping_columns).apply( - augment_one_series, include_groups=False - ) + new_time_series = time_series.groupby(grouping_columns).apply(augment_one_series, include_groups=False) idx_names = list(new_time_series.index.names) idx_names[-1] = "__delete" new_time_series = new_time_series.reset_index(names=idx_names) diff --git a/tsfm_public/toolkit/util.py b/tsfm_public/toolkit/util.py index 1fc72fe9..270b100a 100644 --- a/tsfm_public/toolkit/util.py +++ b/tsfm_public/toolkit/util.py @@ -35,9 +35,7 @@ def select_by_timestamp( """ if not start_timestamp and not end_timestamp: - raise ValueError( - "At least one of start_timestamp or end_timestamp must be specified." - ) + raise ValueError("At least one of start_timestamp or end_timestamp must be specified.") if not start_timestamp: return df[df[timestamp_column] < end_timestamp] @@ -45,10 +43,7 @@ def select_by_timestamp( if not end_timestamp: return df[df[timestamp_column] >= start_timestamp] - return df[ - (df[timestamp_column] >= start_timestamp) - & (df[timestamp_column] < end_timestamp) - ] + return df[(df[timestamp_column] >= start_timestamp) & (df[timestamp_column] < end_timestamp)] def select_by_index( @@ -79,18 +74,12 @@ def select_by_index( raise ValueError("At least one of start_index or end_index must be specified.") if not id_columns: - return _split_group_by_index( - df, start_index=start_index, end_index=end_index - ).copy() + return _split_group_by_index(df, start_index=start_index, end_index=end_index).copy() groups = df.groupby(_get_groupby_columns(id_columns)) result = [] for name, group in groups: - result.append( - _split_group_by_index( - group, name=name, start_index=start_index, end_index=end_index - ) - ) + result.append(_split_group_by_index(group, name=name, start_index=start_index, end_index=end_index)) return pd.concat(result) @@ -127,9 +116,7 @@ def select_by_relative_fraction( pd.DataFrame: Subset of the dataframe. """ if not start_fraction and not end_fraction: - raise ValueError( - "At least one of start_fraction or end_fraction must be specified." - ) + raise ValueError("At least one of start_fraction or end_fraction must be specified.") if start_offset < 0: raise ValueError("The value of start_offset should ne non-negative.") @@ -215,9 +202,7 @@ def _split_group_by_fraction( else: end_index = None - return _split_group_by_index( - group_df=group_df, start_index=start_index, end_index=end_index - ) + return _split_group_by_index(group_df=group_df, start_index=start_index, end_index=end_index) def convert_tsf_to_dataframe( @@ -247,17 +232,13 @@ def convert_tsf_to_dataframe( if not line.startswith("@data"): line_content = line.split(" ") if line.startswith("@attribute"): - if ( - len(line_content) != 3 - ): # Attributes have both name and type + if len(line_content) != 3: # Attributes have both name and type raise Exception("Invalid meta-data specification.") col_names.append(line_content[1]) col_types.append(line_content[2]) else: - if ( - len(line_content) != 2 - ): # Other meta-data have only values + if len(line_content) != 2: # Other meta-data have only values raise Exception("Invalid meta-data specification.") if line.startswith("@frequency"): @@ -265,24 +246,18 @@ def convert_tsf_to_dataframe( elif line.startswith("@horizon"): forecast_horizon = int(line_content[1]) elif line.startswith("@missing"): - contain_missing_values = bool( - strtobool(line_content[1]) - ) + contain_missing_values = bool(strtobool(line_content[1])) elif line.startswith("@equallength"): contain_equal_length = bool(strtobool(line_content[1])) else: if len(col_names) == 0: - raise Exception( - "Missing attribute section. Attribute section must come before data." - ) + raise Exception("Missing attribute section. Attribute section must come before data.") found_data_tag = True elif not line.startswith("#"): if len(col_names) == 0: - raise Exception( - "Missing attribute section. Attribute section must come before data." - ) + raise Exception("Missing attribute section. Attribute section must come before data.") elif not found_data_tag: raise Exception("Missing @data tag.") else: @@ -315,9 +290,7 @@ def convert_tsf_to_dataframe( else: numeric_series.append(float(val)) - if numeric_series.count(replace_missing_vals_with) == len( - numeric_series - ): + if numeric_series.count(replace_missing_vals_with) == len(numeric_series): raise Exception( "All series values are missing. A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series." ) @@ -331,9 +304,7 @@ def convert_tsf_to_dataframe( elif col_types[i] == "string": att_val = str(full_info[i]) elif col_types[i] == "date": - att_val = datetime.strptime( - full_info[i], "%Y-%m-%d %H-%M-%S" - ) + att_val = datetime.strptime(full_info[i], "%Y-%m-%d %H-%M-%S") else: raise Exception( "Invalid attribute type." diff --git a/tsfm_public/toolkit/visualization.py b/tsfm_public/toolkit/visualization.py index a5629526..18af9cc8 100644 --- a/tsfm_public/toolkit/visualization.py +++ b/tsfm_public/toolkit/visualization.py @@ -83,9 +83,7 @@ def plot_ts_forecasting( # plot true data if not HAVE_SEABORN and plot_type == "seaborn": - raise ValueError( - "Please install the seaborn package if seaborn plots are needed." - ) + raise ValueError("Please install the seaborn package if seaborn plots are needed.") # if plot_start > len(test_data_updated): # logging.warning( @@ -138,9 +136,7 @@ def plot_ts_forecasting( # index into the predictions so that the end of the prediction coincides with the end of the ground truth # - predictions_end = ( - plot_range[-1] - prediction_length - context_length + 1 - ) # - context_length - prediction_length + predictions_end = plot_range[-1] - prediction_length - context_length + 1 # - context_length - prediction_length predictions_start = plot_range[0] - context_length @@ -154,9 +150,7 @@ def plot_ts_forecasting( if plot_type == "plotly": for i in plot_index: start = forecast_data.iloc[i][timestamp_column] - timestamps = pd.date_range( - start, freq=periodicity, periods=prediction_length + 1 - ) + timestamps = pd.date_range(start, freq=periodicity, periods=prediction_length + 1) timestamp = timestamps[1:] forecast_val = forecast_data.iloc[i][forecast_name] plot_line( diff --git a/tsfmhfdemos/neurips/app.py b/tsfmhfdemos/neurips/app.py index d2db7a36..b60b1781 100644 --- a/tsfmhfdemos/neurips/app.py +++ b/tsfmhfdemos/neurips/app.py @@ -42,9 +42,7 @@ def tsforecasting_with_fmdls(): ) st.title(GLOBAL_CONFIG["title"]) - st.write( - "", unsafe_allow_html=True - ) + st.write("", unsafe_allow_html=True) st.write(GLOBAL_CONFIG["intro"]) @@ -103,26 +101,20 @@ def tsforecasting_with_fmdls(): for idx, channel in enumerate(dataset_meta["channel_plots"]): # col = columns[idx % num_cols] st.plotly_chart( - model_util.create_figure( - **dataset_meta, **model_meta, **approach_meta, channel=channel - ), + model_util.create_figure(**dataset_meta, **model_meta, **approach_meta, channel=channel), use_container_width=True, fig_size=(1600, 200), ) with col2: st.subheader("Performance") - df_perf = model_util.get_performance( - metrics=METRICS, **dataset_meta, **model_meta, **approach_meta - ) + df_perf = model_util.get_performance(metrics=METRICS, **dataset_meta, **model_meta, **approach_meta) df_perf_styled = df_perf.style.set_table_styles( [ {"selector": "th", "props": "background-color: whitesmoke;"}, ] - ).format( - precision=3 - ) # .style.hide(axis="index") + ).format(precision=3) # .style.hide(axis="index") st.write(df_perf_styled.to_html(), unsafe_allow_html=True) st.write("") @@ -193,9 +185,7 @@ def tsforecasting_with_fmdls(): out = re.sub(r"\\textbf{([^&]*)}", r"\1", table_source) out = re.sub(r"\\uline{([^&]*)}", r"\1", out) out = re.sub(r"\s*|\$\\pm\$[^&]*|\\cline{.*}", "", out) - vals = np.array([r.split("&")[3:] for r in out.split(r"\\")[2:30]]).astype( - float - ) + vals = np.array([r.split("&")[3:] for r in out.split(r"\\")[2:30]]).astype(float) leaderboard = pd.DataFrame( index=pd.MultiIndex.from_product( diff --git a/tsfmhfdemos/neurips/backends/v1/model_util.py b/tsfmhfdemos/neurips/backends/v1/model_util.py index 1f650a04..dc482e08 100644 --- a/tsfmhfdemos/neurips/backends/v1/model_util.py +++ b/tsfmhfdemos/neurips/backends/v1/model_util.py @@ -149,9 +149,7 @@ def forecast(**kwargs) -> pd.DataFrame: prep_path = get_preprocessor_path(**kwargs) model_class = get_model_class(model_path) - model = model_class.from_pretrained( - model_path, num_input_channels=len(forecast_columns) - ) + model = model_class.from_pretrained(model_path, num_input_channels=len(forecast_columns)) forecast_pipeline = TimeSeriesForecastingPipeline( model=model, @@ -194,9 +192,7 @@ def create_figure(**kwargs) -> graph_objs.Figure: model_class = get_model_class(model_path) - model = model_class.from_pretrained( - model_path, num_input_channels=len(forecast_columns) - ) + model = model_class.from_pretrained(model_path, num_input_channels=len(forecast_columns)) context_length = model.config.context_length periodicity = kwargs["periodicity"] channel = kwargs["channel"]