diff --git a/pf2rnaseq/figures/figureCC1.py b/pf2rnaseq/figures/figureCC1.py index befd9cf..56594c3 100644 --- a/pf2rnaseq/figures/figureCC1.py +++ b/pf2rnaseq/figures/figureCC1.py @@ -19,30 +19,19 @@ def makeFigure(): """Get a list of the axis objects and create a figure.""" - ax, f = getSetup((20, 4), (1, 3)) + ax, f = getSetup((6, 6), (2, 2)) subplotLabel(ax) X = anndata.read_h5ad("/opt/extra-storage/CRC/GSE178341/crc10x_full_50cmp.h5ad") - samples_only_df = sample_names_only(X, "HistologicGradeSimple") - - - # grouping_hgsgs = pd.Series(samples_only_df["HistologicGradeSimple"].to_numpy()) - - # for i in grouping_hgsgs: - # print(i) - # print(grouping_hgsgs) - # print(np.unique(grouping_hgsgs)) - # plot_condition_factors(X, ax[0], condition_label="PID", cond_group_labels=grouping_hgsgs, groupConditions=True) + samples_names = sample_names_only(X, "HistologicGradeSimple") - - # plot_condition_factors(X, ax[0], condition_label="HistologicGradeSimpleGradeSimple"), X.obs["MMRStatus"], groupConditions=True) - # plot_eigenstate_factors(X, ax[1]) - # plot_gene_factors(X, ax[2]) - # plot_factor_weight(X, ax[3]) - - # plot_labels_pacmap(X, "time", ax[4]) + plot_condition_factors(X, ax[0], condition_label="PID", cond_group_labels=pd.Series(samples_names), groupConditions=True) + ax[0].yaxis.set_ticklabels([]) + plot_eigenstate_factors(X, ax[1]) + plot_gene_factors(X, ax[2]) + ax[2].yaxis.set_ticklabels([]) return f @@ -51,22 +40,20 @@ def makeFigure(): def sample_names_only(X: anndata.AnnData, label: str): """Obtain samples once only with corresponding observations""" samples = X.obs - print(samples) - unique_idx = np.unique(samples["condition_unique_idxs"]) + label_samples = [] - label_samples = np.empty(len(unique_idx), dtype=str) - for i in range(20): + for i in range(len(unique_idx)): samples_idx = samples.loc[samples["condition_unique_idxs"] == i] - print(samples_idx[label]) - print(np.unique(samples_idx[label])) - label_samples[i] = str(np.unique(samples_idx[label])) - - print(label_samples) - - - # df_samples = samples.drop_duplicates(subset="condition_unique_idxs") - # df_samples = df_samples.sort_values("condition_unique_idxs") + if pd.isna(samples_idx[label].to_numpy()).any() == True: + samples_idx_np = samples_idx[label].to_numpy() + label_wo_nan = np.unique(samples_idx_np[~pd.isna(samples_idx_np)]) + label_w_nan = label_wo_nan + "-NaN" + label_samples.append(label_w_nan[0]) + else: + label_no_nan = np.unique(samples_idx[label]) + label_samples.append(label_no_nan[0]) - # return df_samples \ No newline at end of file + + return label_samples \ No newline at end of file