diff --git a/ddmc/figures/figureM4.py b/ddmc/figures/figureM4.py index 52fa98b9..a2575e41 100644 --- a/ddmc/figures/figureM4.py +++ b/ddmc/figures/figureM4.py @@ -58,7 +58,7 @@ def do_phenotype_regression(n_runs=3, n_components=35, n_cv_folds=3): hot_cold = cptac.get_hot_cold_labels() lr = LogisticRegressionCV( - cv=3, + cv=n_cv_folds, solver="saga", max_iter=10000, n_jobs=-1, diff --git a/ddmc/figures/figureM7.py b/ddmc/figures/figureM7.py index ca2a7ff9..cb593a13 100644 --- a/ddmc/figures/figureM7.py +++ b/ddmc/figures/figureM7.py @@ -15,40 +15,35 @@ plot_roc, plot_cluster_regression_coefficients, normalize_cluster_centers, + get_highest_weighted_clusters, ) def makeFigure(): - """Get a list of the axis objects and create a figure""" - # Get list of axis objects axes, f = getSetup((11, 7), (2, 3), multz={0: 1, 4: 1}) cptac = CPTAC() is_hot = cptac.get_hot_cold_labels() p_signal = cptac.get_p_signal() model = DDMC(n_components=30, seq_weight=100, random_state=5).fit(p_signal) - assert ( - not model.has_empty_clusters() - ), "This plot assumes that every cluster will have at least one peptide. Please rerun with fewer components are more peptides." centers = model.transform(as_df=True).loc[is_hot.index] + centers.iloc[:, :] = normalize_cluster_centers(centers.values) plot_p_signal_across_clusters_and_binary_feature(is_hot, centers, "is_hot", axes[0]) - centers.iloc[:, :] = normalize_cluster_centers(centers.values) + n_cv = 15 lr = LogisticRegressionCV( - cv=3, solver="saga", n_jobs=1, penalty="l1", max_iter=10000 + cv=n_cv, solver="saga", n_jobs=1, penalty="l1", max_iter=10000 ) plot_roc( - lr, centers.values, is_hot.values, cv_folds=3, title="ROC TI", return_mAUC=True + lr, centers.values, is_hot.values, cv_folds=n_cv, title="ROC TI", return_mAUC=True ) plot_cluster_regression_coefficients(axes[1], lr, title="") - top_clusters = np.argsort(np.abs(lr.coef_.squeeze()))[-3:] + top_clusters = get_highest_weighted_clusters(model, lr.coef_) - # plot predicted kinases for most weighted clusters distances = model.predict_upstream_kinases()[top_clusters] - # plot upstream Kinases plot_cluster_kinase_distances( distances, model.get_pssms(clusters=top_clusters), axes[2], num_hits=2 )