-
Notifications
You must be signed in to change notification settings - Fork 0
/
tmp.r
executable file
·67 lines (49 loc) · 1.6 KB
/
tmp.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# dependencies
library("dplyr")
library("stringr")
library("tidyr")
info <- list.files("data") %>%
str_replace(".rda", "") %>%
data.frame(info = .) %>%
separate(info, c("genome", "enzyme", "reso"), sep = "_") %>%
mutate(reso = as.integer(reso))
info_unique <- lapply(info, unique)
av_genomes <- info_unique$genome
av_enzymes <- info_unique$enzyme
av_resos <- info_unique$reso
enz <- "DpnII"
geno <- "hg38"
reso <- 20e3
get_genomic_features <- function(geno, enz, reso){
enz <- ifelse(enz == "MboI", "DpnII", enz)
av_genomes <- c("dm3", "hg38", "mm10")
av_enzymes <- c("BspHI", "DpnII", "HindIII", "HinfI", "MspI", "NcoI", "NlaIII")
av_resolutions <- c(1000000, 100000, 10000, 1000, 5000000, 500000, 50000, 5000)
stopifnot(geno %in% av_genomes)
stopifnot(enz %in% av_enzymes)
if(reso %in% av_resolutions){
out <- paste0(geno,
"_", enz, "_",
as.integer(reso)) %>%
get()
return(out)
}
aux <- (reso - av_resolutions)
aux <- av_resolutions[aux >= 0]
best_reso <- max(aux)
out <- paste0(geno,
"_", enz, "_",
as.integer(best_reso)) %>%
get() %>%
mutate(pos = floor(pos /reso) * reso,
pos = as.integer(pos),
bin = paste0(chr, ":", pos)) %>%
group_by(chr, pos, bin) %>%
summarize(res = mean(res, na.rm = T),
cg = mean(cg, na.rm = T),
map = mean(map, na.rm = T)) %>%
ungroup()
out
}
get_genomic_features("hg38", "DpnII", 150000) %>%
str