Skip to content

Commit

Permalink
remove classification specific steps
Browse files Browse the repository at this point in the history
  • Loading branch information
wgifford committed Aug 7, 2024
1 parent f64ba77 commit beb7b9d
Showing 1 changed file with 30 additions and 26 deletions.
56 changes: 30 additions & 26 deletions tsfm_public/toolkit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,52 +1205,56 @@ def join_list_without_repeat(*lists: List[List[Any]]) -> List[Any]:
final_set = set(final)
return final

def convert_tsfile(filename: str, classification=False) -> pd.DataFrame:

def convert_tsfile(filename: str) -> pd.DataFrame:
"""Converts a .ts file into a pandas dataframe.
Returns the result in canonical multi-time series format, with an ID column, and timestamp.
Args:
filename (str): Input file name.
classification (bool): classification dataset
Returns:
pd.DataFrame: Converted time series
"""

final_df = pd.DataFrame()

dfs = []
df = convert_tsfile_to_dataframe(filename, return_separate_X_and_y=False)

rows, columns = df.shape

for i in range(rows):
temp_df = pd.DataFrame()
for j in range(columns):
if j!=columns-1:
if j != columns - 1:
series_to_df = df.iloc[i].iloc[j].to_frame().reset_index()
if j==0:
if j == 0:
repeat = len(series_to_df)
if type(series_to_df['index'][0])==pd.Timestamp: ## include timestamp columns if data includes timestamps
temp_df['timestamp'] = series_to_df['index']
temp_df['id'] = [i]*repeat
temp_df[f'value_{j}'] = series_to_df[0]
if (
type(series_to_df["index"][0]) == pd.Timestamp
): ## include timestamp columns if data includes timestamps
temp_df["timestamp"] = series_to_df["index"]
temp_df["id"] = [i] * repeat
temp_df[f"value_{j}"] = series_to_df[0]
else:
target = df.iloc[i].iloc[j]
temp_df['target'] = [target]*repeat

final_df = pd.concat([final_df, temp_df],ignore_index=True)

## convert targets to floats or integers
## non-numeric classification labels will be converted to integers as well
try:
final_df['target'] = pd.to_numeric(final_df['target'])
except:
string_labels = final_df['target'].unique()
label_to_int_map = {str_label: num for num, str_label in enumerate(string_labels)}
final_df['target'] = final_df['target'].map(label_to_int_map)

## make sure labels are 0 indexed if classification
if classification and final_df['target'].min() != 0:
final_df['target'] = final_df['target'] - 1
temp_df["target"] = [target] * repeat

dfs.append(temp_df)

final_df = pd.concat(dfs, ignore_index=True)

# to be moved to a preprocessor
# ## convert targets to floats or integers
# ## non-numeric classification labels will be converted to integers as well
# try:
# final_df["target"] = pd.to_numeric(final_df["target"])
# except KeyError:
# string_labels = final_df["target"].unique()
# label_to_int_map = {str_label: num for num, str_label in enumerate(string_labels)}
# final_df["target"] = final_df["target"].map(label_to_int_map)

# ## make sure labels are 0 indexed if classification
# if classification and final_df["target"].min() != 0:
# final_df["target"] = final_df["target"] - 1

return final_df

0 comments on commit beb7b9d

Please sign in to comment.