-
Notifications
You must be signed in to change notification settings - Fork 57
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* make comparing within model figure smaller * update tedious arrows for pointing at coefficient plot * add segmented copy number data to initialize logic * scripts to process and then plot copy burden data
- Loading branch information
Showing
6 changed files
with
196 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# Gregory Way 2017 | ||
# PanCancer Classifier | ||
# scripts/copy_burden_figures.R | ||
# | ||
# Generate figures for visualizing copy burden across different samples | ||
# stratified by TP53 mutation status | ||
# | ||
# Usage: Run in command line | ||
# | ||
# Rscript --vanilla scripts/copy_burden_figures.R | ||
# | ||
# Output: | ||
# Two figures summarizing copy burden across TCGA Pan Can samples | ||
|
||
library(ggplot2) | ||
|
||
# Set File Names | ||
base_file <- file.path("classifiers", "TP53") | ||
burden_file <- file.path(base_file, "tables", "copy_burden_predictions.tsv") | ||
snaptron_file <- file.path("scripts", "snaptron", | ||
"junctions_with_mutations.csv") | ||
frac_alt_plot <- file.path(base_file, "figures", "fraction_altered_plot.pdf") | ||
violin_plot <- file.path(base_file, "figures", "seg_altered_violin_plot.pdf") | ||
|
||
# Load Files | ||
copy_burden <- readr::read_tsv(burden_file) | ||
junc_df <- readr::read_csv(snaptron_file) | ||
junc_df <- junc_df[,-1] | ||
junc_df <- junc_df[!duplicated(junc_df), ] | ||
|
||
# Location of the silent mutation and truncation | ||
junc_exon_df = junc_df[junc_df$start == "7675237", ] | ||
silent_junc <- junc_exon_df[junc_exon_df$Variant_Classification == "Silent", ] | ||
silent_junc <- silent_junc[silent_junc$snaptron_id == "13945701", ] | ||
silent_junc <- silent_junc[silent_junc$TP53 %in% 0, ] | ||
silent_junc <- silent_junc[silent_junc$include %in% 1, ] | ||
|
||
ggplot(copy_burden, aes(weight, frac_altered, color = factor(TP53))) + | ||
geom_point(alpha = 0.6, size = 0.3) + theme_bw() + | ||
xlab("TP53 Inactivation Probability") + | ||
ylab("CNV Burden (Fraction Altered)") + | ||
labs(color = "TP53 Status") | ||
ggsave(frac_alt_plot, width = 5, height = 4, dpi = 600) | ||
|
||
# Build and Process Copy Burden DataFrame | ||
copy_burden$silent <- 0 | ||
copy_burden[copy_burden$Sample %in% silent_junc$tcga_id, "silent"] <- 1 | ||
silent_and_junc <- copy_burden[copy_burden$silent == 1, ] | ||
silent_and_junc$TP53 <- "c.375G>T Mutation" | ||
|
||
copy_burden[copy_burden$total_status == 0, "TP53"] = "Wild-Type" | ||
copy_burden[copy_burden$total_status == 1, "TP53"] = "TP53 Loss of Function" | ||
|
||
plot_ready <- copy_burden[, c("frac_altered", "TP53")] | ||
plot_ready <- rbind(plot_ready, silent_and_junc[, c("frac_altered", "TP53")]) | ||
|
||
false_negatives <- copy_burden[(copy_burden$total_status == 1) & | ||
(copy_burden$weight < 0.5), ] | ||
false_negatives$TP53 <- "False Negative" | ||
plot_ready <- rbind(plot_ready, false_negatives[, c("frac_altered", "TP53")]) | ||
|
||
false_positives <- copy_burden[(copy_burden$total_status == 0) & | ||
(copy_burden$weight >= 0.5), ] | ||
false_positives$TP53 <- "False Positive" | ||
plot_ready <- rbind(plot_ready, false_positives[, c("frac_altered", "TP53")]) | ||
|
||
predicted_neg <- copy_burden[copy_burden$weight < 0.5, ] | ||
predicted_neg$TP53 <- "Predicted Wild-Type" | ||
plot_ready <- rbind(plot_ready, predicted_neg[, c("frac_altered", "TP53")]) | ||
|
||
predicted_pos <- copy_burden[copy_burden$weight >= 0.5, ] | ||
predicted_pos$TP53 <- "Predicted Loss" | ||
plot_ready <- rbind(plot_ready, predicted_pos[, c("frac_altered", "TP53")]) | ||
|
||
plot_levels <- c("c.375G>T Mutation", "False Positive", | ||
"Predicted Loss", "TP53 Loss of Function", | ||
"False Negative", "Predicted Wild-Type", "Wild-Type") | ||
|
||
plot_ready$TP53 <- factor(plot_ready$TP53, levels = plot_levels) | ||
|
||
# Build violin plots for copy number alterations comparison | ||
ggplot(plot_ready, aes(x = TP53, y = frac_altered)) + | ||
ylab("CNV Burden (Fraction Altered)") + xlab("TP53 Status") + | ||
labs(fill = "") + geom_violin(aes(fill = TP53), size = 0.3, alpha = 0.3, | ||
adjust = 0.7, trim = TRUE) + | ||
geom_boxplot(aes(fill = TP53), size = 0.3, width = 0.1, outlier.size = 0.3) + | ||
coord_flip() + geom_hline(yintercept = 0.5, linetype = "dashed", | ||
color = "red") + | ||
theme(legend.position = c(1.4, 0.7), axis.text.y = element_blank(), | ||
axis.text.x = element_text(size = rel(0.7)), | ||
axis.title = element_text(size = rel(0.7)), | ||
legend.text = element_text(size = rel(0.35)), | ||
legend.key = element_blank(), | ||
legend.key.size = unit(0.8, "lines"), | ||
legend.background = element_rect(fill = alpha("white", 0)), | ||
panel.grid.major = element_line(color = "white", size = 0.3), | ||
panel.grid.minor = element_line(color = "white", size = 0.3), | ||
panel.background = element_rect(fill = "white"), | ||
plot.margin = unit(c(0.1, 2.6, 0.2, 0.2),"cm"), | ||
panel.border = element_rect(fill = NA, size = 0.4)) + | ||
guides(fill = guide_legend(reverse = TRUE, ncol = 1), color = FALSE) | ||
|
||
ggsave(violin_plot, height = 2.25, width = 2.5, dpi = 600) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
""" | ||
Gregory Way 2017 | ||
PanCancer Classifier | ||
scripts/copy_burden_merge.py | ||
Merge per sample classifier scores with segment based scores | ||
Usage: Run in command line with required command argument: | ||
python scripts/copy_burden_merge.py --classifier_folder | ||
classifier_folder is a string pointing to the location of the classifier data | ||
Output: | ||
.tsv file of classifier scores merged with segment based copy number scores | ||
""" | ||
|
||
import os | ||
import argparse | ||
import pandas as pd | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('-c', '--classifier_folder', | ||
help='string of the location of classifier data') | ||
args = parser.parse_args() | ||
|
||
# Load command arguments | ||
pred_fild = os.path.join(args.classifier_folder, 'classifier_decisions.tsv') | ||
burden_file = os.path.join('data', 'seg_based_scores.tsv') | ||
out_file = os.path.join(os.path.dirname(pred_fild), 'tables', | ||
'copy_burden_predictions.tsv') | ||
|
||
# Load and process data | ||
copy_burden_df = pd.read_table(burden_file) | ||
classifier_df = pd.read_table(pred_fild, index_col=0) | ||
|
||
combined_df = classifier_df.merge(copy_burden_df, left_index=True, | ||
right_on='Sample') | ||
combined_df.index = combined_df['Sample'] | ||
combined_df.to_csv(out_file, sep='\t') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters