-
Notifications
You must be signed in to change notification settings - Fork 0
/
export_dream_median_phospho.R
130 lines (79 loc) · 4.22 KB
/
export_dream_median_phospho.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# 3 parts:
# 1. export data for public use
# - remove the conditions used for the scoring
# 2. export prediction coinditions:
# - export the conditions and reporters masked with NAs for aim2
# 3. export the validation data.
median_interpolated_data <- read_rds("./data/median_data/interpolated_median_allsamples_correct_times.rds")
median_data <- median_interpolated_data %>% spread(reporter,value)
# cell-line in aims
cell_line_sheet <- readxl::read_excel("./data/cell_line_distribution.xlsx",sheet = 1,range = "A1:I69")
# 1. export data for public use --------------------------------------------------
# export median phosphorylation data after removing the conditions that are to be predicted
# Export data for the public: all data except the test conditions
# we remove imTOR from all cell-lines.
# then depending which cell-line is used in which AIM, we remove a subset of the data.
public_data <- median_data %>% filter(treatment != "imTOR") %>%# remove imTOR condition from all
group_by(cell_line) %>% nest(.key = "data") %>%
mutate(cleaned_data = map2(data,cell_line,function(data,cell_line){
current_cell_line = cell_line
purpose = cell_line_sheet %>% filter(cell_line == current_cell_line)
purpose[1,as.logical(is.na(purpose[1,]))] = ""
if(purpose$AIM_1_1 =="test"){
# remove psites to be predicted
data[,c("p.ERK", "p.Akt.Ser473.","p.S6","p.HER2", "p.PLCg2")] = NA_real_
}else if(purpose$AIM_1_2_1 == "test"){
# remove all data in condition to be predicted
if(current_cell_line %in% c("MDAMB468","MCF12A","BT483")){
data = data %>% filter(treatment!="iEGFR")
}else if(current_cell_line %in% c("184B5","ZR751","HCC202")){
data = data %>% filter(treatment!="iMEK")
}else if(current_cell_line %in% c("UACC3199","SKBR3","MDAMB231")){
data = data %>% filter(treatment!="iPI3K")
}else if(current_cell_line %in% c("HCC1806","Hs578T","HCC1428")){
data = data %>% filter(treatment!="iPKC")
}
}else if(purpose$AIM_1_2_2 == "test"){
# nothing to do here, imTOR condition already removed.
}else if(purpose$AIM2 == "test"){
# providing only the full condition for the test
data = data %>% filter(treatment=="full")
}
return(data)
})) %>% unnest(cleaned_data)
write_csv(public_data,path = "./challenge_data/median_phospho/median_phospho_data.csv")
### 2. export prediction conditions --------------------------------------------
# export the conditions and use NA fo the values that the participants ahve to predict
prediction_data <- median_data %>% filter(treatment != "imTOR") %>%# remove imTOR condition from all
select(-p.HER2,-p.PLCg2) %>% # no need to predict these
group_by(cell_line) %>% nest(.key = "data") %>%
mutate(cleaned_data = map2(data,cell_line,function(data,cell_line){
current_cell_line = cell_line
purpose = cell_line_sheet %>% filter(cell_line == current_cell_line)
purpose[1,as.logical(is.na(purpose[1,]))] = ""
reporters = colnames(data)[-1:-2]
if(purpose$AIM2 == "test"){
# providing only the full condition for the test
data = data %>% filter(treatment !="full") %>%
mutate_at(reporters,~NA_real_)
}else return(tibble())
return(data)
})) %>% unnest(cleaned_data)
write_csv(prediction_data,path = "./challenge_data/predict_conditions/AIM_2_template_data.csv")
### # 3. export the validation data. --------------------------------------------
# export the conditions for validaion
validation_data <- median_data %>% filter(treatment != "imTOR") %>%# remove imTOR condition from all
select(-p.HER2,-p.PLCg2) %>% # no need to predict these
group_by(cell_line) %>% nest(.key = "data") %>%
mutate(cleaned_data = map2(data,cell_line,function(data,cell_line){
current_cell_line = cell_line
purpose = cell_line_sheet %>% filter(cell_line == current_cell_line)
purpose[1,as.logical(is.na(purpose[1,]))] = ""
reporters = colnames(data)[-1:-2]
if(purpose$AIM2 == "test"){
# providing only the full condition for the test
data = data %>% filter(treatment !="full")
}else return(tibble())
return(data)
})) %>% unnest(cleaned_data)
write_csv(validation_data,path = "./challenge_data/validation_data/AIM_2_median_data.csv")