-
Notifications
You must be signed in to change notification settings - Fork 0
/
copy_number_correlates.qmd
146 lines (99 loc) · 8.29 KB
/
copy_number_correlates.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
---
title: "copy_number_correlates"
format: html
editor: visual
---
## LPS Copy Number / DNA methylation / Expression
```{r}
#| echo: false
library(dplyr)
library(ggplot2)
library(ggpubr)
dir.create("plots")
```
```{r}
load("data/t449_778_methylation_matrices.RData")
load("data/methylation_matrices.RData")
meth2 <- t449.matrix[,"coverage"]/100
cov2 <- t449.matrix[,"methylation"]
t449.matrix[,"coverage"] <- cov2
t449.matrix[,"methylation"] <- meth2
meth2 <- t778.matrix[,"coverage"]/100
cov2 <- t778.matrix[,"methylation"]
t778.matrix[,"coverage"] <- cov2
t778.matrix[,"methylation"] <- meth2
lps141.matrix.indices <- which(apply(lps141.matrix, 1, function(x){!any(is.na(x))}))
lps853.matrix.indices <- which(apply(lps853.matrix, 1, function(x){!any(is.na(x))}))
t449.matrix.indices <- which(apply(t449.matrix, 1, function(x){!any(is.na(x))}))
t778.matrix.indices <- which(apply(t778.matrix, 1, function(x){!any(is.na(x))}))
# Confirm rows match
all(rownames(lps141.matrix.indices) == rownames(lps853.matrix.indices))
all(rownames(lps141.matrix.indices) == rownames(t449.matrix.indices))
all(rownames(lps141.matrix.indices) == rownames(t778.matrix.indices))
common.indices <- Reduce(intersect, list(lps141.matrix.indices, lps853.matrix.indices, t449.matrix.indices, t778.matrix.indices))
length(common.indices)
# Use all rows
common.indices <- 1:nrow(lps141.matrix)
lps141.common <- lps141.matrix[common.indices,]
lps853.common <- lps853.matrix[common.indices,]
t449.common <- t449.matrix[common.indices,]
t778.common <- t778.matrix[common.indices,]
```
```{r}
dat <- list(lps141.common, lps853.common, t449.common, t778.common)
names(dat) <- c("LPS141", "LPS853", "T449", "T778")
cell_lines <- names(dat)
df <- data.frame(dat)
```
DNAm
```{r}
df %>% ggplot(aes(LPS141.methylation)) + geom_histogram() + theme_bw() + ggtitle("LPS141 DNAm") -> p1
df %>% ggplot(aes(LPS853.methylation)) + geom_histogram() + theme_bw()+ ggtitle("LPS853 DNAm") -> p2
df %>% ggplot(aes(T449.methylation)) + geom_histogram() + theme_bw()+ ggtitle("T449 DNAm") -> p3
df %>% ggplot(aes(T778.methylation)) + geom_histogram() + theme_bw()+ ggtitle("T778 DNAm") ->p4
ggarrange(p1, p2, p3, p4, nrow = 1)
ggsave("plots/dnam_histograms.pdf", width=12, height=3)
```
```{r}
#df %>% ggplot(aes(paste0(i, ".coverage"), paste0(i, ".coverage")))
df %>% ggplot(aes(LPS141.coverage, LPS853.coverage)) + geom_point(aes(color=LPS853.methylation-LPS141.methylation), size=3) + scale_color_gradient2(mid = "grey99") + theme_bw() + xlim(0, 50) + ylim(0,50)
```
```{r}
df %>% mutate(LPS853_vs_LPS141_CN = log2((1+LPS853.coverage)/(1+LPS141.coverage))) %>% mutate(LPS853_vs_LPS141_CN_clipped = pmin(pmax(LPS853_vs_LPS141_CN, -2), 2)) %>% ggplot(aes(LPS141.methylation, LPS853.methylation)) + geom_point(aes(color=LPS853_vs_LPS141_CN_clipped)) + scale_color_gradient2() + theme_bw()
```
```{r}
df %>% mutate(LPS853_vs_T449_CN = log2((1+LPS853.coverage)/(1+T449.coverage))) %>% mutate(LPS853_vs_T449_CN_clipped = pmin(pmax(LPS853_vs_T449_CN, -2), 2)) %>% ggplot(aes(T449.methylation, LPS853.methylation)) + geom_point(aes(color=LPS853_vs_T449_CN_clipped)) + scale_color_gradient2() + theme_bw()
```
```{r}
df %>% mutate(LPS853_vs_T778_CN = log2((1+LPS853.coverage)/(1+T778.coverage))) %>% mutate(LPS853_vs_T778_CN_clipped = pmin(pmax(LPS853_vs_T778_CN, -2), 2)) %>% ggplot(aes(T778.methylation, LPS853.methylation)) + geom_point(aes(color=LPS853_vs_T778_CN_clipped)) + scale_color_gradient2() + theme_bw()
```
```{r}
df %>% mutate(LPS141_vs_LPS853_CN = log2((1+LPS141.coverage)/(1+LPS853.coverage))) %>% mutate(LPS141_vs_LPS853_CN_bin = cut(LPS141_vs_LPS853_CN, c(-Inf, -1, 1, Inf), labels=c("LPS853\nCN Amplification", "LPS141 CN = LPS853 CN", "LPS141\nCN Amplification"))) %>% filter(!is.na(LPS141_vs_LPS853_CN_bin)) %>% ggplot(aes(LPS141_vs_LPS853_CN_bin, LPS141.methylation - LPS853.methylation)) + geom_jitter(alpha=0.3) + geom_hline(yintercept = 0) + theme_bw() + xlab("") + annotate("label", x=3.3, y=-0.85, label="Lower DNAm\nin LPS141", size=2, col="blue") + annotate("label", x=3.3, y=0.85, label="Lower DNAm\nin LPS853", size=2, col="blue")
ggsave("plots/lps141_lps853_cn_meth.pdf", width=5, height=4)
```
```{r}
df %>% mutate(LPS141_vs_T449_CN = log2((1+LPS141.coverage)/(1+T449.coverage))) %>% mutate(LPS141_vs_T449_CN_bin = cut(LPS141_vs_T449_CN, c(-Inf, -1, 1, Inf), labels=c("T449\nCN Amplification", "LPS141 CN = T449 CN", "LPS141\nCN Amplification"))) %>% filter(!is.na(LPS141_vs_T449_CN_bin)) %>% ggplot(aes(LPS141_vs_T449_CN_bin, LPS141.methylation - T449.methylation)) + geom_jitter(alpha=0.3) + geom_hline(yintercept = 0) + theme_bw() + xlab("") + annotate("label", x=0.7, y=-0.85, label="Lower DNAm\nin LPS141", size=2, col="blue") + annotate("label", x=0.7, y=0.85, label="Lower DNAm\nin T449", size=2, col="blue")
ggsave("plots/lps141_t449_cn_meth.pdf", width=5, height=4)
```
```{r}
df %>% mutate(T449_vs_LPS853_CN = log2((1+T449.coverage)/(1+LPS853.coverage))) %>% mutate(T449_vs_LPS853_CN_bin = cut(T449_vs_LPS853_CN, c(-Inf, -1, 1, Inf), labels=c("LPS853\nCN Amplification", "T449 CN = LPS853 CN", "T449\nCN Amplification"))) %>% filter(!is.na(T449_vs_LPS853_CN_bin)) %>% ggplot(aes(T449_vs_LPS853_CN_bin, T449.methylation - LPS853.methylation)) + geom_jitter(alpha=0.3) + geom_hline(yintercept = 0) + theme_bw() + xlab("") + annotate("label", x=3.3, y=-0.85, label="Lower DNAm\nin T449", size=2, col="blue") + annotate("label", x=3.3, y=0.85, label="Lower DNAm\nin LPS853", size=2, col="blue")
ggsave("plots/t449_lps853_cn_meth.pdf", width=5, height=4)
```
```{r}
df %>% mutate(T778_vs_LPS853_CN = log2((1+T778.coverage)/(1+LPS853.coverage))) %>% mutate(T778_vs_LPS853_CN_bin = cut(T778_vs_LPS853_CN, c(-Inf, -1, 1, Inf), labels=c("LPS853\nCN Amplification", "T778 CN = LPS853 CN", "T778\nCN Amplification"))) %>% filter(!is.na(T778_vs_LPS853_CN_bin)) %>% ggplot(aes(T778_vs_LPS853_CN_bin, T778.methylation - LPS853.methylation)) + geom_jitter(alpha=0.3) + geom_hline(yintercept = 0) + theme_bw() + xlab("") + annotate("label", x=3.3, y=-0.85, label="Lower DNAm\nin T778", size=2, col="blue") + annotate("label", x=3.3, y=0.85, label="Lower DNAm\nin LPS853", size=2, col="blue")
ggsave("plots/t778_lps853_cn_meth.pdf", width=5, height=4)
```
```{r}
df %>% mutate(T778_vs_T449_CN = log2((1+T778.coverage)/(1+T449.coverage))) %>% mutate(T778_vs_T449_CN_bin = cut(T778_vs_T449_CN, c(-Inf, -1, 1, Inf), labels=c("T449\nCN Amplification", "T778 CN = T449 CN", "T778\nCN Amplification"))) %>% filter(!is.na(T778_vs_T449_CN_bin)) %>% ggplot(aes(T778_vs_T449_CN_bin, T778.methylation - T449.methylation)) + geom_jitter(alpha=0.3) + geom_hline(yintercept = 0) + theme_bw() + xlab("") + annotate("label", x=3.3, y=-0.85, label="Lower DNAm\nin T778", size=2, col="blue") + annotate("label", x=3.3, y=0.85, label="Lower DNAm\nin T449", size=2, col="blue")
ggsave("plots/t778_t449_cn_meth.pdf", width=5, height=4)
```
```{r}
df %>% mutate(LPS141_vs_LPS853_CN = log2((1+LPS141.coverage)/(1+LPS853.coverage))) %>%
mutate(LPS141_vs_LPS853_CN_bin = cut(LPS141_vs_LPS853_CN, c(-Inf, -1, 1, Inf), labels=c("LPS853 CN Amplification", "LPS141 CN = LPS853 CN", "LPS141 CN Amplification"))) %>% filter(!is.na(LPS141_vs_LPS853_CN_bin)) %>% ggplot(aes(LPS853.methylation, LPS141.methylation)) + geom_point(alpha=0.2) + geom_hline(yintercept = 0) + facet_wrap(~LPS141_vs_LPS853_CN_bin) + theme_bw() + annotate("label", x=0.8, y=0.2, label="Lower DNAm\nin LPS141", size=3, col="blue") + annotate("label", x=0.2, y=0.8, label="Lower DNAm\nin LPS853", size=3, col="blue") + geom_abline(intercept = 0, slope=1, col="blue", linetype=2)
ggsave("plots/lps141_lps853_cn_meth_scatter.pdf", width=11, height=4)
```
```{r}
df %>% mutate(LPS141_vs_T449_CN = log2((1+LPS141.coverage)/(1+T449.coverage))) %>%
mutate(LPS141_vs_T449_CN_bin = cut(LPS141_vs_T449_CN, c(-Inf, -1, 1, Inf), labels=c("T449 CN Amplification", "LPS141 CN = T449 CN", "LPS141 CN Amplification"))) %>% filter(!is.na(LPS141_vs_T449_CN_bin)) %>% ggplot(aes(T449.methylation, LPS141.methylation)) + geom_point(alpha=0.2) + geom_hline(yintercept = 0) + facet_wrap(~LPS141_vs_T449_CN_bin) + theme_bw() + annotate("label", x=0.75, y=0.25, label="Lower DNAm\nin LPS141", size=2, col="blue") + annotate("label", x=0.25, y=0.75, label="Lower DNAm\nin T449", size=2, col="blue") + geom_abline(intercept = 0, slope=1)
ggsave("plots/lps141_t449_cn_meth_scatter.pdf", width=10, height=4)
```