From e22ac5df6f63f7362fc6253549b36cb6545b37cc Mon Sep 17 00:00:00 2001 From: KOSASIH Date: Mon, 15 Jul 2024 09:48:44 +0700 Subject: [PATCH] Create data_utils.py --- utils/data_utils.py | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 utils/data_utils.py diff --git a/utils/data_utils.py b/utils/data_utils.py new file mode 100644 index 0000000..1c2a6e3 --- /dev/null +++ b/utils/data_utils.py @@ -0,0 +1,48 @@ +import pandas as pd +from sklearn.preprocessing import StandardScaler + +def load_nav_data(file_path): + """ + Load navigation data from a CSV file. + + Args: + file_path (str): The path to the CSV file. + + Returns: + pandas DataFrame: The loaded navigation data. + """ + data = pd.read_csv(file_path) + return data + +def preprocess_nav_data(data): + """ + Preprocess navigation data by scaling and normalizing the features. + + Args: + data (pandas DataFrame): The navigation data to preprocess. + + Returns: + pandas DataFrame: The preprocessed navigation data. + """ + scaler = StandardScaler() + data_scaled = scaler.fit_transform(data.drop("target", axis=1)) + data_preprocessed = pd.DataFrame(data_scaled, columns=data.columns[:-1]) + data_preprocessed["target"] = data["target"] + return data_preprocessed + +def generate_synthetic_data(num_samples, num_features): + """ + Generate synthetic data using a Gaussian mixture model. + + Args: + num_samples (int): The number of samples to generate. + num_features (int): The number of features to generate. + + Returns: + pandas DataFrame: The generated synthetic data. + """ + from sklearn.mixture import GaussianMixture + gmm = GaussianMixture(n_components=3, covariance_type="full") + data = gmm.sample(num_samples)[0] + data = pd.DataFrame(data, columns=[f"feature_{i}" for i in range(num_features)]) + return data