Skip to content

Commit

Permalink
Merge pull request #118 from yhan8/YH_merge
Browse files Browse the repository at this point in the history
Addressed Greg's additional comments and ready to merge to master branch
  • Loading branch information
yhan8 authored Sep 22, 2021
2 parents c695367 + a399e60 commit bc8841f
Show file tree
Hide file tree
Showing 9 changed files with 17 additions and 105 deletions.
14 changes: 0 additions & 14 deletions 2.describe-data/1.merge-datasets-gct.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -969,20 +969,6 @@
"output_gct_file = os.path.join(gct_dir, \"consensus_profiles.gct\")\n",
"write_gct(profiles=median_consensus_df, output_file=output_gct_file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
4 changes: 3 additions & 1 deletion 3.resistance-signature/0.training-test-split.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@
"Yu Han, 2021\n",
"\n",
"https://github.com/broadinstitute/profiling-resistance-mechanisms/issues/116 \n",
"We dropped the inference set (batch 3) because of overly confluent plates and suboptimal plate design. In the notebook, I still need to output the bortezomib signature analytical set, but I can also include the new batches of data, which will serve as a better experimentally designed inference set as included in pull request #114\n",
"We dropped the inference set (batch 3) because of overly confluent plates and suboptimal plate design. \n",
"\n",
"In the notebook, I still need to output the bortezomib signature analytical set, but I can also include the new batches of data, which will serve as a better experimentally designed inference set as included in pull request #114\n",
"\n"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,7 @@
" max_permuted_value = readr::col_double()\n",
")\n",
"\n",
"#inference_df <- readr::read_tsv(sig_results_file, col_types = sig_cols) %>%\n",
" #dplyr::filter(Metadata_model_split == \"inference\")\n",
"\n",
"signature_df <- readr::read_tsv(sig_results_file, col_types = sig_cols) %>%\n",
" #dplyr::filter(Metadata_model_split != \"inference\")\n",
"signature_df <- readr::read_tsv(sig_results_file, col_types = sig_cols) \n",
"\n",
"print(dim(signature_df))\n",
"head(signature_df, 4)"
Expand Down Expand Up @@ -262,19 +258,12 @@
" shuffled = readr::col_character()\n",
")\n",
"\n",
"roc_df <- readr::read_tsv(roc_auc_file, col_types=roc_cols) %>%\n",
"roc_df <- readr::read_tsv(roc_auc_file, col_types=roc_cols) \n",
" dplyr::arrange(shuffled) %>%\n",
" dplyr::mutate(roc_auc = round(roc_auc, 3))\n",
"\n",
"roc_df <- roc_df[, c(\"shuffled\", \"roc_auc\")]\n",
"colnames(roc_df) <- c(\"AUROC\\nShuffled:\", \"roc_auc\")\n",
"\n",
"#roc_df <- roc_df %>%\n",
" #tidyr::spread(`AUROC\\nShuffled:`, roc_auc, sep=\"\") %>%\n",
" #dplyr::arrange(desc(Split))\n",
"\n",
"#roc_df$Split <- dplyr::recode(roc_df$Split, !!!legend_labels)\n",
"#roc_df"
"colnames(roc_df) <- c(\"AUROC\\nShuffled:\", \"roc_auc\")\n"
]
},
{
Expand Down Expand Up @@ -302,15 +291,11 @@
"metadata": {},
"outputs": [],
"source": [
"threshold_points_df <- roc_curve_df %>%\n",
" dplyr::filter(shuffled == \"False\") %>%\n",
" #dplyr::group_by(model_split) %>%\n",
" dplyr::filter(abs(threshold) == min(abs(threshold))) %>%\n",
"threshold_points_df <- roc_curve_df \n",
" dplyr::filter(shuffled == \"False\") \n",
" dplyr::filter(abs(threshold) == min(abs(threshold))) \n",
" dplyr::ungroup()\n",
"\n",
"#legend_colors <- c(\"#D41159\")\n",
"#legend_labels <- c(\"WT\" = \"WT\", \"BZ\" = \"BZ\")\n",
"\n",
"roc_gg <- (\n",
" ggplot(roc_curve_df, aes(x = fpr, y = tpr))\n",
" + geom_line(aes(linetype = shuffled), size=0.2)\n",
Expand Down Expand Up @@ -357,10 +342,6 @@
" shape = 21,\n",
" lwd = 0.5\n",
" )\n",
" #+ facet_wrap(\n",
" # \"~Metadata_dataset\",\n",
" # ncol = 1,\n",
" # scales = \"free_y\")\n",
" \n",
" + xlab(\"\")\n",
" + ylab(\"Signature score\\n(singscore)\")\n",
Expand Down
7 changes: 0 additions & 7 deletions 3.resistance-signature/3.get-performance-metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -426,13 +426,6 @@
"output_file = pathlib.Path(f\"{output_dir}/{dataset}_roc_curve.tsv\")\n",
"roc_curve_data_df.to_csv(output_file, sep=\"\\t\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
16 changes: 0 additions & 16 deletions 3.resistance-signature/8.compile-otherclone-dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2230,22 +2230,6 @@
"output_file = pathlib.Path(f\"{output_dir}/otherclones_normalized_profiles.tsv.gz\")\n",
"full_df.to_csv(output_file, sep=\"\\t\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66f2fed6",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "34488b99",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@
# Yu Han, 2021
#
# https://github.com/broadinstitute/profiling-resistance-mechanisms/issues/116
# We dropped the inference set (batch 3) because of overly confluent plates and suboptimal plate design. In the notebook, I still need to output the bortezomib signature analytical set, but I can also include the new batches of data, which will serve as a better experimentally designed inference set as included in pull request #114
# We dropped the inference set (batch 3) because of overly confluent plates and suboptimal plate design.
#
# In the notebook, I still need to output the bortezomib signature analytical set, but I can also include the new batches of data, which will serve as a better experimentally designed inference set as included in pull request #114
#
#

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,7 @@ sig_cols <- readr::cols(
max_permuted_value = readr::col_double()
)

#inference_df <- readr::read_tsv(sig_results_file, col_types = sig_cols) %>%
#dplyr::filter(Metadata_model_split == "inference")

signature_df <- readr::read_tsv(sig_results_file, col_types = sig_cols) %>%
#dplyr::filter(Metadata_model_split != "inference")
signature_df <- readr::read_tsv(sig_results_file, col_types = sig_cols)

print(dim(signature_df))
head(signature_df, 4)
Expand Down Expand Up @@ -87,19 +83,13 @@ roc_cols <- readr::cols(
shuffled = readr::col_character()
)

roc_df <- readr::read_tsv(roc_auc_file, col_types=roc_cols) %>%
roc_df <- readr::read_tsv(roc_auc_file, col_types=roc_cols)
dplyr::arrange(shuffled) %>%
dplyr::mutate(roc_auc = round(roc_auc, 3))

roc_df <- roc_df[, c("shuffled", "roc_auc")]
colnames(roc_df) <- c("AUROC\nShuffled:", "roc_auc")

#roc_df <- roc_df %>%
#tidyr::spread(`AUROC\nShuffled:`, roc_auc, sep="") %>%
#dplyr::arrange(desc(Split))

#roc_df$Split <- dplyr::recode(roc_df$Split, !!!legend_labels)
#roc_df

table_theme <- gridExtra::ttheme_default(
core = list(fg_params=list(cex = 0.3)),
Expand All @@ -111,15 +101,11 @@ table_gg <- gridExtra::tableGrob(roc_df,
theme = table_theme,
rows = NULL)

threshold_points_df <- roc_curve_df %>%
dplyr::filter(shuffled == "False") %>%
#dplyr::group_by(model_split) %>%
dplyr::filter(abs(threshold) == min(abs(threshold))) %>%
threshold_points_df <- roc_curve_df
dplyr::filter(shuffled == "False")
dplyr::filter(abs(threshold) == min(abs(threshold)))
dplyr::ungroup()

#legend_colors <- c("#D41159")
#legend_labels <- c("WT" = "WT", "BZ" = "BZ")

roc_gg <- (
ggplot(roc_curve_df, aes(x = fpr, y = tpr))
+ geom_line(aes(linetype = shuffled), size=0.2)
Expand Down Expand Up @@ -150,10 +136,6 @@ box_plot_gg <- (
shape = 21,
lwd = 0.5
)
#+ facet_wrap(
# "~Metadata_dataset",
# ncol = 1,
# scales = "free_y")

+ xlab("")
+ ylab("Signature score\n(singscore)")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,3 @@
output_file = pathlib.Path(f"{output_dir}/{dataset}_roc_curve.tsv")
roc_curve_data_df.to_csv(output_file, sep="\t", index=False)


# In[ ]:




Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,3 @@
output_file = pathlib.Path(f"{output_dir}/otherclones_normalized_profiles.tsv.gz")
full_df.to_csv(output_file, sep="\t", index=False)


# In[ ]:





# In[ ]:




0 comments on commit bc8841f

Please sign in to comment.