forked from Ulas-lab/Shiny-Seq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
module2.R
483 lines (408 loc) · 20.3 KB
/
module2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
source("functions.R")
source("gene_count_module.R")
#source("E:/Zenitha/App/module1.r")
Module_Normalized_data_UI<-function(id)
{
ns<-NS(id)
tagList(
fluidRow(column(10,bsAlert("message"))),
fluidRow(column(5,actionButton(ns("help"), "Help"))),
fluidRow(column(8,
bsAlert("alert"),
plotlyOutput(ns("cut_off_plot"))
),
column(4,helpText("Please enter before pressing Start"),
textInput(ns("cutoff"), "Cut-off value", value = 10, width = NULL, placeholder = NULL))),
fluidRow(
column(1,
selectInput(ns("datachoice2") ,label = h5("Select Data Type"),
choices = list("Excel" = 1, "CSV" = 2),
selected = 1)),
column(1,
br(),
br(),
downloadButton(ns("downloadnorm"), 'Download normalized Data Table'))),
fluidRow(
column(12,
# normalized data table
#downloadButton(ns('download'), 'Download full Data'),
DT::dataTableOutput(ns("normal")),
gene_count_module_UI(ns("module"))
# uiOutput("plot_option_gene_n"),
# downloadButton('download_norm_genecount', 'Download plot'),
# plotOutput("gene_plot_n"))
)),
bsModal(ns("modalhelp"), "Help for error model matrix not full rank", ns("help"),size = "large",
helpText("There are two main reasons for this problem: either one or more
columns in the model matrix are linear combinations of other columns, or there
are levels of factors or combinations of levels of multiple factors which are missing
samples. We address these two problems below"),
helpText("Case 1:"),
fluidRow(column(5,img(src="error1.png"))),
helpText(" In the above table two variables contain exactly the same information. The software cannot
fit an effect for batch and condition, because
they produce identical columns in the model matrix. This is also referred to as
perfect confounding."),
p("Solution:"),
helpText("the batch effect cannot be fit and must be removed
from the model formula. There is just no way to tell apart the condition effects
and the batch effects. The options are either to assume there is no batch effect
(which we know is highly unlikely given the literature on batch effects in sequencing
datasets) or to repeat the experiment and properly balance the conditions across
batches."),
helpText("Case 2:"),
fluidRow(column(5,img(src="error2.png"))),
helpText("In the above table the variables are not identical,
but one variable can be formed by the combination of other factor levels. In the
following example, the effect of batch 2 vs 1 cannot be fit because it is identical
to a column in the model matrix which represents the condition C vs A effect."),
p("Solution:"),
helpText("the batch effect cannot be fit and must be removed
from the model formula. There is just no way to tell apart the condition effects
and the batch effects. The options are either to assume there is no batch effect
(which we know is highly unlikely given the literature on batch effects in sequencing
datasets) or to repeat the experiment and properly balance the conditions across
batches."),
helpText("Case 3:"),
fluidRow(column(5,img(src="error3.png"))),
helpText("In the above case where we can in fact perform inference.In the above example the experiment
comprises of grouped individuals, where the goal is to test the group-specific effect of
a treatment, while controlling for individual effects."),
p("Solution:"),
helpText("unfortunately the tool cannot be currently used for a similar scenario
as above.We are working on it.You will have to use DESeq2 to define the design."),
helpText("Case 4:"),
fluidRow(column(5,img(src="error4.png"))),
helpText("In the above case as highlighted a level is missing from a factor.The group 3 does not have level C"),
p("Solution:"),
helpText("Remove these samples with missing level.In the above case remove samples 13 to 16")
)#,
)
}
#normalized tab
#create a gene plots module
Module_Normalized_data<-function(input,output,session,vdata,conchoice,designchoice,v_click1)
{
data<-vdata()
print(ncol(data[[1]]))
trim <- reactive({
#req(input$conchoice)
tryCatch(
#check if user has provided the choice of treatment/condition variable
# if (!is.null(input$conchoice))#((input$ok1 > 0 )&& (!is.null(input$conchoice)))#
{
print("inside module 2 line 86")
print("treatment/condition variable provided")
#Get variable to consider as treatment/condition
condition<-as.numeric(conchoice)
#print(condition)
#===============Main purpose of this reactive==========================#
#Step 1: prepare for DESeq2
#Inputs needed: expression/count table, annotation table and design formula
#Step 2: prepare expression/count table for trimming (Quality control used to remove genes with low counts)
#=======================================================================#
#Input 1: Get expression data
edata <-data[[4]]
print(head(edata))
#Input 2: Get annotation data
pheno<-data[[2]]
#print(pheno)
#Input 3: Get design formula
design<-as.numeric(designchoice)
#print(design)
#Rename treatment/condition variable to condition
#This is done to ensure one standard name to refer to , in the future computation steps
colnames(pheno)[condition]<-"condition"
#Since the design formula contains the name of the treatment/condition variable(based on input annotation table)
#We rename the variable to "condirion" in the design formula
#Updating design formula
d<-''
for (i in design)
{
#print(i)
d<-paste(d,colnames(pheno)[i]," + ",sep=" ")
}
#print(d)
d<-paste('~',' ',d,colnames(pheno)[condition],sep = " ")
#print new formula
#print(formula(d))
#once we have obtained the input we store it in a container (a subclass)
#DESeqDataSet is a subclass of RangedSummarizedExperiment, used to store the input values,
#intermediate calculations and results of an analysis of differential expression.
#check dese2 vignette for more information.
#Prepare DESeq dataset using inputs
# filechoice <- callModule(Module_Raw_data_Input, "moduleA")
# print(class(edata))
# if(as.numeric(filechoice()) == 2)
# dds<-DESeqDataSetFromMatrix(edata,colData=pheno,formula(d))
#
# else if(as.numeric(filechoice()) == 1)
if(identical(data[[4]], data[[1]]))
dds <- DESeqDataSetFromMatrix(edata,colData=pheno,formula(d))
else
dds <- DESeqDataSetFromTximport(edata,colData=pheno,formula(d))
#A check to see if the annotation table has been correctly stored and if the value can be retrieved
print(head(counts(dds)))
print("yes")
genes_to_keep <- rowSums(counts(dds)) >= as.numeric(input$cutoff)
print("test1")
dds <- dds[genes_to_keep,]
dds <- DESeq(dds)
dds.norm <- as.data.frame(counts(dds, normalized=T))
if(identical(data[[4]], data[[1]]))
dt1<-data.frame(rowSum = rowSums(edata))
else
dt1<-data.frame(rowSum = rowSums(edata$counts))
print("test3")
#Normalization: DESeq2 performs normalization by first computing size factor for each sample
#call function min_samples_three to get a list of treatment/condition groups that contain only one or two samples
samp<-min_samples_three(dds.norm)
#prior to proceeding with normalization, if treatment/condition groups have one or two samples
#then user is alerted through a warning message (error message if a treatment/condition group has only one sample)
if(is.null(samp)) {
#print('k')
closeAlert(session,"exampleAlert")
}
else if(samp[[2]]==1) { #get treatment/condition groups with only one sample
if(length(samp[[1]])>1) #if there are more than one treatment/condition group with only one sample
{
x<-strsplit(samp[[1]],' ') #Get names of all treatment/condition groups with only one sample
# print('k')
# print(x)
# print(length(x))
if(!is.null(x)) closeAlert(session,"exampleAlert")
sam<-NULL
for (i in 1:(length(x)-1))
{
sam<-paste0(sam,x[[i]],',')
}
sam<-paste0(substr(sam,1,nchar(sam)-1),' and ',x[[length(x)]])
#print(sam)
#Alert user with the names of all treatment/condition groups with only one sample
createAlert(session,"alert", "exampleAlert", title = "Oops!",
content = paste0("The conditions ",sam," have only 1 sample.Either add additional samples for these conditions or remove the condition."), append = T)
}
else{
#Alert user with the name of treatment/condition group with only one sample
createAlert(session,"alert", "exampleAlert", title = "Oops!",
content = paste0("The condition ",samp[[1]]," has only 1 sample.Either add additional samples for this condition or remove the condition."), append = T)
}
}
else if(samp[[2]]==2) #get treatment/condition groups with only two samples
{
if(length(samp[[1]])>1)#if there are more than one treatment/condition groups with only two samples
{
x<-strsplit(samp[[1]],' ')#Get names of all treatment/condition groups with only two samples
# print('k')
# print(x)
# print(length(x))
print("module 2 line 192")
if(!is.null(v_click1())) print(x)
if(!is.null(x)) closeAlert(session,"exampleAlert")
sam<-NULL
for (i in 1:(length(x)-1))
{
sam<-paste0(sam,x[[i]],',')
}
print(sam)
sam<-paste0(substr(sam,1,nchar(sam)-1),' and ',x[[length(x)]])
print(sam)
#Alert users with names of all treatment/condition groups with only two samples
createAlert(session,"alert", "exampleAlert", title = "Warning!",
content = paste0("The conditions ",sam," have only 2 samples"), append = T)
}
else
{
#Alert user with name of treatment/condition group with only two samples
createAlert(session,"alert", "exampleAlert", title = "Warning!",
content = paste0("The condition ",samp[[1]]," has 2 samples"), append = T)
}
}
list(dds, dds.norm, dt1)
}
#It is possible that two variable in the annotation table can coontain the same information
#meaning the variables explain the same variance in the dataset in which case DESEq2 throws an error
#"the model matrix is not full rank". In such a case the user may have to remove one of these
#variables from the annotation table or combine it as one. For more information rgearding
#the problem and what to do please check for the error in DESeq2 vignette.
, error = function(c) {
print(typeof(c$message))
print(substr(c$message,1,33))
if("the model matrix is not full rank" == substr(c$message,1,33)) c$message <- paste0("Error:The model matrix is not full rank. To know more about this please click the help button on this page.")
else c$message
stop(c)
})
})
#Prior to trimming( remove genes with low counts), we need to get the threshold from the user
#The user decides the the threshold based on the cut off plot.
# The cut off plot is an interactive plot which indicates the number of genes remaining in the dataset after
# trimming. The user can hover over the plot to see the threshold value and the corresponding genes that would remain in the
# dataset if the value is chosen
# This reactive computes the input for the cut off plot
cutoff<-reactive({
#if(!is.null(input$conchoice)){
#call reactive trim
# get dataframe conintaing the (average expression of a gene
# in each treatment/condition group + maximum of this average value)
matrix<-trim()[[3]]
print("Inside module 2 line 269 cutof reactive")
dat<-as.data.frame(matrix(NA, nrow = 200, ncol = 2))
colnames(dat)<-c("x","y")
#Loop to compute the number of genes that would remain in the dataset
# for thresholds one to 200.
for(i in 0:199)
{
#print(i)
dat[i+1,1]<-i #Threshold value (value for x-axis)
dat[i+1,2]<-length(which(matrix$rowSum>=i)) #Get number of remaining genes after trimming (value for y-axis)
}
#print(head(x))
#print(head(y))
#dat<-as.data.frame(x,y)
#print(head(dat))
dat
#}
})
# Display cut off plot
output$cut_off_plot <-renderPlotly({
#req(conchoice)
# if(!is.null(conchoice))
# {
dat<-cutoff()
#print(head(dat))
# Plotly plot
plot_ly(x=dat$x,y=dat$y,type = "scattergl",mode = 'markers',
hoverinfo = 'text',
text = ~paste('Cut-off:', dat$x,
'</br> Total gene counts:', dat$y)) %>%
#add_trace(x=dat$x[1],y =dat$y[1], mode = "lines")%>%
#add_markers(type = "scattergl")%>%
layout(
xaxis = list(title = "Cut-off", gridcolor = "#bfbfbf", domain = c(0, 0.98)),
yaxis = list(title = "Total gene counts", gridcolor = "#bfbfbf"))
# }
# else plotly_empty()
})
#Trim expression/count data using threshold if provided by user and update DESeq2 dataset
dds.fc<-reactive({
progress <- shiny::Progress$new()
# Make sure it closes when we exit this reactive, even if there's an error
on.exit(progress$close())
progress$set(message = "Preparing normalized table", value = 0)
# Number of times we'll go through the loop to update progress bar
n <- 3
#Step 1: Retieve DESeq2 dataset object
dds<-trim()[[1]]#isolate(trim()[[1]])
#if(!is.null(v_click1)) dds<-trim()[[1]]#incase samples have been selected to be deleted
print('module 2 line 325 dds')
#print(dds)
#In order to visualize PCA we need to transform count data. Hence we perform rlogtransformation.
#define variable to contain transformed data
rld<-NULL
# Increment the progress bar, and update the detail text.
progress$inc(1/n, detail = paste("Doing part", 1,"/",n))
# Pause for 0.1 seconds to simulate a long computation.
Sys.sleep(0.1)
#Step 2: check if user has provided threshold to trim data, if not then skip trimming and proceed to normalization
#print("cutoff")
#print(input$cutoff)
dds.fc<-dds
# Increment the progress bar, and update the detail text.
progress$inc(1/n, detail = paste("Doing part", 2,"/",n))
# Pause for 0.1 seconds to simulate a long computation.
Sys.sleep(0.1)
rld<-rlogTransformation(dds.fc)
# Increment the progress bar, and update the detail text.
progress$inc(1/n, detail = paste("Doing part", 3,"/",n))
# Pause for 0.1 seconds to simulate a long computation.
Sys.sleep(0.1)
list(dds.fc,rld)
})
#Once dataset has been trimmed by removing genes with low counts we proceed to the next step of pre-processing
#Perform normalization
#Get normalized data
normal <-reactive({
print("inside module 2 line 405 normal")
# if(!is.null(input$conchoice))
# {
dds <- dds.fc()[[1]]
#Estimate size factors
norm <- counts(dds, normalized=T)
#Get normalized data
#print("normalized table")
#print(head(norm))
if(!is.null(norm))
{
createAlert(session,"message","exampleAlert1", title="Message: To proceed to next step",
content = "Click on batch effect check button.", append=FALSE)
}
else closeAlert(session,"exampleAlert1")
norm
# }
})
# Display normalized table
output$normal <-
DT::renderDataTable({
#print("inside normalized table")
#print(input$conchoice)
#req(input$conchoice)
# if(!is.null(input$conchoice))# & input$cutoff !="")
# {
DT::datatable(normal(),class = 'cell-border stripe',
selection = list(mode='single',target = 'row'),
extensions = list('Scroller'=NULL,'Buttons'=NULL),
options = list(deferRender = TRUE,scrollX = TRUE,scrollY = 250,scroller = TRUE,dom = 'Bfrtip',
buttons = list('copy')#I('colvis')
)
)
#}
})
#display gene expression across conditon
observeEvent(input$normal_rows_selected,{
#get which gene was clicked
#get the row number of the gene clicked
print('hey')
print(input$normal_rows_selected)
selected_row <- input$normal_rows_selected
print(selected_row)
#get the normalized data
full_data<-normal()
print(head(full_data))
print('hey')
#get the gene
an_gene<-rownames(full_data)[selected_row]
print(an_gene)
counts<-as.vector(full_data[selected_row,])#[which(an_gene %in% rownames(full_data)),])
print(counts)
print(as.factor(colData(dds.fc()[[1]])[,as.numeric(conchoice)]))
cond<-as.vector(colData(dds.fc()[[1]])[,as.numeric(conchoice)])
#rownames(cond)<-colData(dds.fc()[[1]])[,1]
library('data.table')
print(length(counts))
print(length(cond))
df<-data.frame(counts,cond)
colnames(df)<-c('count','condition')
print(head(df))
callModule(gene_count_module,"module",NULL,reactive({dds.fc()[[1]]}),reactive({an_gene}))
})
#download normalized data
output$downloadnorm <- downloadHandler(
filename = function() {
if(as.numeric(input$datachoice2==1)){paste("Normalized Data Table.xlsx")}
else if (as.numeric(input$datachoice2==2)){paste("Normalized Data Table.csv")}
},
content = function(file) {
if(as.numeric(input$datachoice2==1)){
wb <- createWorkbook()
addWorksheet(wb, sheetName = "Normalized table")
writeData(wb = wb, sheet = 1, x = normal(), colNames = T, rowNames = T)
saveWorkbook(wb, file)
# write.xlsx2(normal(), file, sheetName = "Normalized table",
# col.names = TRUE, row.names = TRUE, append = FALSE)
}
else if (as.numeric(input$datachoice2==2)){write.csv(normal(), file)}
}
)
dds<-list(dds.fc=reactive({dds.fc()}),normal=reactive({normal()}))
return(dds)
}