-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdorothea.R
80 lines (56 loc) · 2.4 KB
/
dorothea.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
### DoRothEA to GMT
# Takes latest DoRothEA database of Transcription Factor-Target interactions and transposes it into
# a .gmt file suitable for The Broad/UCSD's GSEA program. GSEA is used as an alternative to the
# authors preferred program VIPER.
# https://saezlab.github.io/dorothea/
###
### 0. Load Libraries
###
# install from bioconductor
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("dorothea")
# install the development version from GitHub if you want
# install.packages("devtools")
# devtools::install_github("saezlab/dorothea")
library(dorothea)
library(dplyr)
###
### 1. Access (mouse) Regulons
###
#Change to human if desired
#data(dorothea_hs, package = "dorothea")
data(dorothea_mm, package = "dorothea")
#Filter by Confidence
#Range from A (highest confidence) to E (lowest confidence)
# A: Interactions that are supported by all four lines of evidence (literature, ChIP-seq Peaks, Inference from gene expression, and TF binding motifs on promoters) - considered to be highly reliable
# B-D: Curated and/or ChIP-seq interactions with different levels of additional evidence.
# E: Interactions that are only supported by computational predictions
#Change depending on mouse or human and what confidence levels you want
#regulons = dorothea_hs
# filter(confidence %in% c("A", "B", "C", "D"))
regulons = dorothea_mm %>%
filter(confidence %in% c("A", "B", "C", "D"))
###
### 2. Convert format
###
# https://stackoverflow.com/questions/70071847/rearranging-data-from-tibble-into-a-dataframe-in-r/70071923?noredirect=1#comment123868985_70071923
regulons = as.data.frame(regulons %>%
group_by(tf) %>%
summarise(Targets = list(target)) )
#Split the list-column
# https://stackoverflow.com/questions/50881440/split-a-list-column-into-multiple-columns
library(tidyr)
regulons = regulons %>% unnest_wider(Targets)
# Add column that would be description in the GMT but will just be the row number here
# Might replace with a real description later
number_of_rows <- nrow(regulons)
number_of_rows
row_num <- tibble(x = 1:number_of_rows)
library(tibble)
regulons <- regulons %>% add_column(row_num, .after = "tf")
###
### 3. Convert regulon table to GMT
###
# https://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats
write.table(regulons,"dorothea.gmt.txt",sep="\t", na = "", row.names=FALSE, col.names = FALSE, quote = FALSE)