From ee1bd6fddfe56ea8ed7985d62d63ea658303334f Mon Sep 17 00:00:00 2001 From: lssb Date: Wed, 2 Aug 2023 15:59:36 +0800 Subject: [PATCH] Add files via upload --- 01.PBMC_bulk.py | 127 +++++++++++++++++++++++ 02.Placenta.4p.py | 132 ++++++++++++++++++++++++ 03.Placenta.4p_and_PBMC.py | 144 ++++++++++++++++++++++++++ 04.Placenta.4p_one_by_one.py | 157 +++++++++++++++++++++++++++++ 05.PBMC_bulk_alone.py | 132 ++++++++++++++++++++++++ 06.PBMC_celltype.py | 134 ++++++++++++++++++++++++ 07.PBMC_celltype_one_by_one.py | 155 ++++++++++++++++++++++++++++ 08.placenta_celltype.py | 134 ++++++++++++++++++++++++ 09.placenta_celltype_one_by_one.py | 156 ++++++++++++++++++++++++++++ 10.placenta_bulk.py | 127 +++++++++++++++++++++++ 11.placenta_bulk_263.py | 127 +++++++++++++++++++++++ 11 files changed, 1525 insertions(+) create mode 100644 01.PBMC_bulk.py create mode 100644 02.Placenta.4p.py create mode 100644 03.Placenta.4p_and_PBMC.py create mode 100644 04.Placenta.4p_one_by_one.py create mode 100644 05.PBMC_bulk_alone.py create mode 100644 06.PBMC_celltype.py create mode 100644 07.PBMC_celltype_one_by_one.py create mode 100644 08.placenta_celltype.py create mode 100644 09.placenta_celltype_one_by_one.py create mode 100644 10.placenta_bulk.py create mode 100644 11.placenta_bulk_263.py diff --git a/01.PBMC_bulk.py b/01.PBMC_bulk.py new file mode 100644 index 0000000..223fe83 --- /dev/null +++ b/01.PBMC_bulk.py @@ -0,0 +1,127 @@ +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +import random +seed=4 +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path): + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + return scale_x(raw_data[..., :263]), (raw_data[..., 263]) + + +data_dir = "../01.train_data/PBMC_bulk/" +train_x, train_y = load_data(data_dir + "train.csv") +valid_x, valid_y = load_data(data_dir + "valid.csv") +test_x, test_y = load_data(data_dir + "test.csv") + + + + +def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(1)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=40, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + +seed = 245472 + + + +model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + +res_dir = "res/res_PBMC_bulk/" +np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") +pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") +pd.DataFrame({"real_y":test_y, "pred_y": tf.reshape(example_result, [-1])}).to_csv(res_dir + "03.res.csv") + + + +plt.scatter(test_y,tf.reshape(example_result, [-1])) +plt.savefig("123.pdf") +plt.savefig("123.tiff") + + +y_pre = pd.Series(tf.reshape(example_result, [-1])) +y = pd.Series(test_y) + +print(y.corr(y_pre)) +print(spearmanr(y_pre, y)) +print(pearsonr(y_pre, y)) diff --git a/02.Placenta.4p.py b/02.Placenta.4p.py new file mode 100644 index 0000000..5b92600 --- /dev/null +++ b/02.Placenta.4p.py @@ -0,0 +1,132 @@ +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +os.chdir(r"E:\Documents\bio\10.nucleosome\01.scripts\09.train_scripts") + + +import random +seed=4 +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path): + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + return scale_x(raw_data[..., :263]), (raw_data[..., 264: 267]) + + +data_dir = "../01.train_data/4p/" +train_x, train_y = load_data(data_dir + "train.csv") +valid_x, valid_y = load_data(data_dir + "valid.csv") +test_x, test_y = load_data(data_dir + "test.csv") + + + + +def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + # model.add(tf.keras.layers.AlphaDropout(rate=0.5)) + model.add(tf.keras.layers.Dense(3)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=40, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + +seed = 769031 + + + + +model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + +res_dir = "res/res_placenta_4p2/" +if not os.path.exists(res_dir): os.mkdir(res_dir) + +np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") +pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") +pd.DataFrame(example_result).to_csv(res_dir + "03.res.csv") +pd.DataFrame(test_y).to_csv(res_dir + "04.real_y.csv") + + + +plt.scatter(test_y,tf.reshape(example_result, [-1])) +plt.savefig("res_placenta_4p.pdf") +plt.savefig("res_placenta_4p.tiff") + + +y_pre = pd.Series(tf.reshape(example_result, [-1])) +y = pd.Series(test_y) + +print(y.corr(y_pre)) +print(spearmanr(y_pre, y)) +print(pearsonr(y_pre, y)) diff --git a/03.Placenta.4p_and_PBMC.py b/03.Placenta.4p_and_PBMC.py new file mode 100644 index 0000000..c63bc15 --- /dev/null +++ b/03.Placenta.4p_and_PBMC.py @@ -0,0 +1,144 @@ + +# +# 这个加上了PBMC作为参数 +# 这个加上了PBMC作为参数 +# 这个加上了PBMC作为参数 +# 这个加上了PBMC作为参数 + + + +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +os.chdir(r"E:\Documents\bio\10.nucleosome\01.scripts\09.train_scripts") + + +import random +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path): + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + raw_data[..., 264] = raw_data[..., 264] + return (raw_data[..., :264]), (raw_data[..., 264: 268]) + + +data_dir = "../01.train_data/4p/" +train_x, train_y = load_data(data_dir + "train.csv") +valid_x, valid_y = load_data(data_dir + "valid.csv") +test_x, test_y = load_data(data_dir + "test.csv") + + + + +def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + # model.add(tf.keras.layers.AlphaDropout(rate=0.5)) + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(3)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=40, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + +seed = 588228 + + + + +model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + +res_dir = "res/res_placenta_4p_and_PBMC/" +if not os.path.exists(res_dir): + os.makedirs(res_dir) + + +np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") +pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") +pd.DataFrame(example_result).to_csv(res_dir + "03.res.csv") +pd.DataFrame(test_y).to_csv(res_dir + "04.real_y.csv") + + + +# plt.scatter(test_y,tf.reshape(example_result, [-1])) +# plt.savefig("res_placenta_4p.pdf") +# plt.savefig("res_placenta_4p.tiff") + + +# y_pre = pd.Series(tf.reshape(example_result, [-1])) +# y = pd.Series(test_y) + +# print(y.corr(y_pre)) +# print(spearmanr(y_pre, y)) +# print(pearsonr(y_pre, y)) diff --git a/04.Placenta.4p_one_by_one.py b/04.Placenta.4p_one_by_one.py new file mode 100644 index 0000000..c80cc75 --- /dev/null +++ b/04.Placenta.4p_one_by_one.py @@ -0,0 +1,157 @@ + +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 + + +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +import random +seed=4 +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path, col_index): + # col_index = 264 + # col_index = 265 + # col_index = 266 + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + return (raw_data[..., :264]), (raw_data[..., col_index]) + + +def train_one_position(position_index): + + data_dir = "../01.train_data/4p/" + train_x, train_y = load_data(data_dir + "train.csv", position_index) + valid_x, valid_y = load_data(data_dir + "valid.csv", position_index) + test_x, test_y = load_data(data_dir + "test.csv", position_index) + + + + + def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(1)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=40, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + + + + seed = 946063 + model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + + res_dir = "res/04.Placenta.4p_one_by_one.py/" + str(position_index) + "/" + + if not os.path.exists(res_dir): + os.makedirs(res_dir) + + np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") + pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") + pd.DataFrame({"real_y":test_y, "pred_y": tf.reshape(example_result, [-1])}).to_csv(res_dir + "03.res.csv") + + + + # plt.scatter(test_y,tf.reshape(example_result, [-1])) + # plt.savefig("123.pdf") + # plt.savefig("123.tiff") + + + # y_pre = pd.Series(tf.reshape(example_result, [-1])) + # y = pd.Series(test_y) + + # print(y.corr(y_pre)) + # print(spearmanr(y_pre, y)) + # print(pearsonr(y_pre, y)) + return seed + + +res_seed = [] +for i in [264, 265, 266]: + res_seed.append(train_one_position(i)) + +print(res_seed) + + + + +# [742271, 987885, 189512] +# [762307, 430117, 144375] \ No newline at end of file diff --git a/05.PBMC_bulk_alone.py b/05.PBMC_bulk_alone.py new file mode 100644 index 0000000..8211e49 --- /dev/null +++ b/05.PBMC_bulk_alone.py @@ -0,0 +1,132 @@ +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +import random +seed=4 +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path): + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + return scale_x(raw_data[..., :263]), (raw_data[..., 263]) + + +data_dir = "../01.train_data/individual/" +train_x, train_y = load_data(data_dir + "train.csv") +valid_x, valid_y = load_data(data_dir + "valid.csv") +test_x, test_y = load_data(data_dir + "test.csv") + + + + +def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(1)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=40, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + +seed = 946063 + + + + +model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + +res_dir = "res/individual/" + +if not os.path.exists(res_dir): + os.makedirs(res_dir) + +np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") +pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") +pd.DataFrame({"real_y":test_y, "pred_y": tf.reshape(example_result, [-1])}).to_csv(res_dir + "03.res.csv") + + + +plt.scatter(test_y,tf.reshape(example_result, [-1])) +plt.savefig("123.pdf") +plt.savefig("123.tiff") + + +y_pre = pd.Series(tf.reshape(example_result, [-1])) +y = pd.Series(test_y) + +print(y.corr(y_pre)) +print(spearmanr(y_pre, y)) +print(pearsonr(y_pre, y)) diff --git a/06.PBMC_celltype.py b/06.PBMC_celltype.py new file mode 100644 index 0000000..2ffaab4 --- /dev/null +++ b/06.PBMC_celltype.py @@ -0,0 +1,134 @@ +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +os.chdir(r"E:\Documents\bio\10.nucleosome\01.scripts\09.train_scripts") + + +import random +seed=4 +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path): + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + return (raw_data[..., :263]), (raw_data[..., 263: 269]) + + +data_dir = "../01.train_data/PBMC_CellType200/" +train_x, train_y = load_data(data_dir + "train.csv") +valid_x, valid_y = load_data(data_dir + "valid.csv") +test_x, test_y = load_data(data_dir + "test.csv") + + + + +def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + # model.add(tf.keras.layers.AlphaDropout(rate=0.5)) + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(5)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=80, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + +seed = 769031 + + + +model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + +res_dir = "res/PBMC_CellType200/" +if not os.path.exists(res_dir): + os.makedirs(res_dir) + +np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") +pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") +pd.DataFrame(example_result).to_csv(res_dir + "03.res.csv") +pd.DataFrame(test_y).to_csv(res_dir + "04.real_y.csv") + + + +plt.scatter(test_y,tf.reshape(example_result, [-1])) +plt.savefig("res_placenta_4p.pdf") +plt.savefig("res_placenta_4p.tiff") + + +y_pre = pd.Series(tf.reshape(example_result, [-1])) +test_y2 = pd.Series(tf.reshape(test_y,[-1])) +y = pd.Series(test_y2) + +print(y.corr(y_pre)) +print(spearmanr(y_pre, y)) +print(pearsonr(y_pre, y)) diff --git a/07.PBMC_celltype_one_by_one.py b/07.PBMC_celltype_one_by_one.py new file mode 100644 index 0000000..6aa373f --- /dev/null +++ b/07.PBMC_celltype_one_by_one.py @@ -0,0 +1,155 @@ + +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 + + +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +import random +seed=4 +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path, col_index): + # col_index = 264 + # col_index = 265 + # col_index = 266 + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + return (raw_data[..., :263]), (raw_data[..., col_index]) + + +def train_one_position(position_index): + + data_dir = "../01.train_data/PBMC_CellType200/" + train_x, train_y = load_data(data_dir + "train.csv", position_index) + valid_x, valid_y = load_data(data_dir + "valid.csv", position_index) + test_x, test_y = load_data(data_dir + "test.csv", position_index) + + + + + def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(1)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=80, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + seed = 946063 + + + model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + + res_dir = "res/PBMC_CellType200_one_by_one/" + str(position_index) + "/" + + if not os.path.exists(res_dir): + os.makedirs(res_dir) + + np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") + pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") + pd.DataFrame({"real_y":test_y, "pred_y": tf.reshape(example_result, [-1])}).to_csv(res_dir + "03.res.csv") + + + + # plt.scatter(test_y,tf.reshape(example_result, [-1])) + # plt.savefig("123.pdf") + # plt.savefig("123.tiff") + + + # y_pre = pd.Series(tf.reshape(example_result, [-1])) + # y = pd.Series(test_y) + + # print(y.corr(y_pre)) + # print(spearmanr(y_pre, y)) + # print(pearsonr(y_pre, y)) + return seed + + +res_seed = [] +for i in [263, 264, 265, 266, 267]: + res_seed.append(train_one_position(i)) + +print(res_seed) + + + + +# [742271, 987885, 189512] diff --git a/08.placenta_celltype.py b/08.placenta_celltype.py new file mode 100644 index 0000000..63c0395 --- /dev/null +++ b/08.placenta_celltype.py @@ -0,0 +1,134 @@ +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +os.chdir(r"E:\Documents\bio\10.nucleosome\01.scripts\09.train_scripts") + + +import random +seed=4 +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path): + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + return (raw_data[..., :264]), (raw_data[..., 264: 274]) + + +data_dir = "../01.train_data/placenta_CellType200/" +train_x, train_y = load_data(data_dir + "train.csv") +valid_x, valid_y = load_data(data_dir + "valid.csv") +test_x, test_y = load_data(data_dir + "test.csv") + + + + +def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + # model.add(tf.keras.layers.AlphaDropout(rate=0.5)) + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(8)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=160, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + +seed = 769031 + + + +model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + +res_dir = "res/placenta_CellType200/" +if not os.path.exists(res_dir): + os.makedirs(res_dir) + +np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") +pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") +pd.DataFrame(example_result).to_csv(res_dir + "03.res.csv") +pd.DataFrame(test_y).to_csv(res_dir + "04.real_y.csv") + + + +plt.scatter(test_y,tf.reshape(example_result, [-1])) +plt.savefig("res_placenta_4p.pdf") +plt.savefig("res_placenta_4p.tiff") + + +y_pre = pd.Series(tf.reshape(example_result, [-1])) +test_y2 = pd.Series(tf.reshape(test_y,[-1])) +y = pd.Series(test_y2) + +print(y.corr(y_pre)) +print(spearmanr(y_pre, y)) +print(pearsonr(y_pre, y)) diff --git a/09.placenta_celltype_one_by_one.py b/09.placenta_celltype_one_by_one.py new file mode 100644 index 0000000..fe5e443 --- /dev/null +++ b/09.placenta_celltype_one_by_one.py @@ -0,0 +1,156 @@ + +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 +# 这个是把胎盘四个部位的每个部位分别预测,,并加上了PBMC作为参数 + + +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +import random +seed=4 +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path, col_index): + # col_index = 264 + # col_index = 265 + # col_index = 266 + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + return (raw_data[..., :263]), (raw_data[..., col_index]) + + +def train_one_position(position_index): + + data_dir = "../01.train_data/placenta_CellType200/" + train_x, train_y = load_data(data_dir + "train.csv", position_index) + valid_x, valid_y = load_data(data_dir + "valid.csv", position_index) + test_x, test_y = load_data(data_dir + "test.csv", position_index) + + + + + def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(1)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=80, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + seed = 946063 + + + + model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + + res_dir = "res/placenta_CellType200_one_by_one/" + str(position_index) + "/" + + if not os.path.exists(res_dir): + os.makedirs(res_dir) + + np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") + pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") + pd.DataFrame({"real_y":test_y, "pred_y": tf.reshape(example_result, [-1])}).to_csv(res_dir + "03.res.csv") + + + + # plt.scatter(test_y,tf.reshape(example_result, [-1])) + # plt.savefig("123.pdf") + # plt.savefig("123.tiff") + + + # y_pre = pd.Series(tf.reshape(example_result, [-1])) + # y = pd.Series(test_y) + + # print(y.corr(y_pre)) + # print(spearmanr(y_pre, y)) + # print(pearsonr(y_pre, y)) + return seed + + +res_seed = [] +for i in [264, 265, 266, 267, 268, 269, 270, 271]: + res_seed.append(train_one_position(i)) + +print(res_seed) + + + + +# [839239, 507864, 729255, 654388, 36794, 251, 466391, 594927] \ No newline at end of file diff --git a/10.placenta_bulk.py b/10.placenta_bulk.py new file mode 100644 index 0000000..4787b34 --- /dev/null +++ b/10.placenta_bulk.py @@ -0,0 +1,127 @@ +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +import random +seed=4 +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + # x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path): + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + return scale_x(raw_data[..., :264]), (raw_data[..., 264]) + + +data_dir = "../01.train_data/placenta_bulk/" +train_x, train_y = load_data(data_dir + "train.csv") +valid_x, valid_y = load_data(data_dir + "valid.csv") +test_x, test_y = load_data(data_dir + "test.csv") + + + + +def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(1)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=40, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + +seed = 245472 + + + +model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + +res_dir = "res/res_placenta_bulk/" +np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") +pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") +pd.DataFrame({"real_y":test_y, "pred_y": tf.reshape(example_result, [-1])}).to_csv(res_dir + "03.res.csv") + + + +plt.scatter(test_y,tf.reshape(example_result, [-1])) +plt.savefig("123.pdf") +plt.savefig("123.tiff") + + +y_pre = pd.Series(tf.reshape(example_result, [-1])) +y = pd.Series(test_y) + +print(y.corr(y_pre)) +print(spearmanr(y_pre, y)) +print(pearsonr(y_pre, y)) diff --git a/11.placenta_bulk_263.py b/11.placenta_bulk_263.py new file mode 100644 index 0000000..7c068e0 --- /dev/null +++ b/11.placenta_bulk_263.py @@ -0,0 +1,127 @@ +import tensorflow as tf +import utils +# import functools +import numpy as np +from sklearn import preprocessing +import keras_tuner as kt +from matplotlib import pyplot as plt +import pandas as pd +from scipy.stats import spearmanr, pearsonr +import os + +import random +seed=4 +seed=7 + + +def scale_x(x): + # 预处理 + # 先对非0特征取log + # 然后进行归一化 + min_max_scaler = preprocessing.MinMaxScaler() + standar_scaler = preprocessing.StandardScaler() + x[x!=0] = np.log(x[x!=0]) + x = standar_scaler.fit_transform(x) + x = min_max_scaler.fit_transform(x) + return x + +def load_data(path): + raw_data = np.loadtxt(path, dtype=np.float32, delimiter=",", skiprows=1) + return scale_x(raw_data[..., :263]), (raw_data[..., 263]) + + +data_dir = "../01.train_data/placenta_bulk/" +train_x, train_y = load_data(data_dir + "train.csv") +valid_x, valid_y = load_data(data_dir + "valid.csv") +test_x, test_y = load_data(data_dir + "test.csv") + + + + +def model_with_seed(seed): + random.seed(seed)# 为python设置随机种子 + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed)# 为numpy设置随机种子 + tf.compat.v1.set_random_seed(seed)# tf cpu fix seed + os.environ['TF_DETERMINISTIC_OPS'] = '1' # tf gpu fix seed + + + # 263 -> TPM + + + + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Dense(32, activation="relu")) + + for _ in range(4): + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(32, activation="relu" + ) + ) + + model.add(tf.keras.layers.BatchNormalization()) + model.add(tf.keras.layers.Dense(1)) + + model.compile( + loss = "mse", + optimizer = "adam" + ) + + + + stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) + + y_pre_batch = [] + class logPredictPer100epoch(tf.keras.callbacks.Callback): + def on_epoch_end(self, batch, logs=None): + example_result = self.model.predict(test_x) + y_pre_batch.append(tf.reshape(example_result,[-1])) + + + history = model.fit(x=train_x, + y=train_y, + batch_size=512, + validation_data=(valid_x, valid_y), + epochs=40, + callbacks=[stop_early, logPredictPer100epoch()] + ) + + loss = history.history['loss'] + val_loss = history.history['val_loss'] + + example_batch = test_x + example_result = model.predict(example_batch) + + return (model, history, loss, val_loss, example_batch, example_result, y_pre_batch) + + +seed = 50557 + + + +model, history, loss, val_loss, example_batch, example_result, y_pre_batch = model_with_seed(seed) + + + + + + +res_dir = "res/res_placenta_bulk_263/" +np.savetxt(res_dir + "01.y_pre_history.csv", y_pre_batch, delimiter=",") +pd.DataFrame({"loss": loss, "val_loss": val_loss}).to_csv(res_dir + "02.loss_history.csv") +pd.DataFrame({"real_y":test_y, "pred_y": tf.reshape(example_result, [-1])}).to_csv(res_dir + "03.res.csv") + + + +plt.scatter(test_y,tf.reshape(example_result, [-1])) +plt.savefig("123.pdf") +plt.savefig("123.tiff") + + +y_pre = pd.Series(tf.reshape(example_result, [-1])) +y = pd.Series(test_y) + +print(y.corr(y_pre)) +print(spearmanr(y_pre, y)) +print(pearsonr(y_pre, y))