diff --git a/DESCRIPTION b/DESCRIPTION
index bde9f22..f15c1dd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: BIGapp
Title: Breeding Insight Genomics Shiny Application
-Version: 0.6.1
+Version: 1.0.0
Authors@R:
c(
person(c("Alexander", "M."), "Sandercock",
@@ -20,10 +20,9 @@ Authors@R:
role = "aut"),
person("Breeding Insight Team",
role = "aut"))
-Description: This R shiny app provides a web-based user friendly way for our internal and
- external collaborators to analyze genomic data without needing to use command-line tools.
+Description: This R shiny app provides a web-based user friendly way for researchers to analyze genomic data without needing to use command-line tools.
Initial supported analyses will include the mature genomics/bioinformatics pipelines developed
- within Breeding Insight, with additional analyses continuing to be added.
+ within Breeding Insight, with additional analyses continuing to be added. Both diploid and polyploid species are supported.
License: Apache License (== 2.0)
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
@@ -32,6 +31,7 @@ biocViews:
Imports:
vcfR (>= 1.15.0),
adegenet,
+ curl,
DT,
dplyr,
bs4Dash,
@@ -51,6 +51,7 @@ Imports:
GWASpoly,
AGHmatrix,
factoextra,
+ httr,
future,
shinycssloaders,
RColorBrewer,
diff --git a/NAMESPACE b/NAMESPACE
index 5dd0ab8..2f9add2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -50,6 +50,8 @@ importFrom(bs4Dash,updatebs4TabItems)
importFrom(bs4Dash,valueBox)
importFrom(bs4Dash,valueBoxOutput)
importFrom(config,get)
+importFrom(curl,curl_fetch_memory)
+importFrom(curl,new_handle)
importFrom(factoextra,get_eigenvalue)
importFrom(future,availableCores)
importFrom(golem,activate_js)
@@ -72,8 +74,12 @@ importFrom(graphics,strheight)
importFrom(graphics,strwidth)
importFrom(graphics,text)
importFrom(graphics,title)
+importFrom(httr,GET)
+importFrom(httr,content)
+importFrom(httr,status_code)
importFrom(matrixcalc,is.positive.definite)
importFrom(plotly,add_markers)
+importFrom(plotly,ggplotly)
importFrom(plotly,layout)
importFrom(plotly,plot_ly)
importFrom(plotly,plotlyOutput)
diff --git a/R/GS_functions.R b/R/GS_functions.R
index 03b5e4c..79a98da 100644
--- a/R/GS_functions.R
+++ b/R/GS_functions.R
@@ -311,9 +311,9 @@ GBLUP_genomic_prediction <- function(pheno_dat, Geno.mat, cycles, folds, traits,
results[(((r-1)*5)+fold), (length(traits)+2)] <- fold
# Extract GEBVs
- GEBVs_fold[, trait_idx] <- traitpred$g[test] #Confirm it is accuract to calculate the GEBVs for testing group from the trained model
+ GEBVs_fold[, trait_idx] <- traitpred$g[test]
- # Calculate heritability (these are wrong)
+ # Calculate heritability (*confirm this calculation* - either way will not report to user)
Vu <- traitpred$Vg
Ve <- traitpred$Ve
heritability_scores[(((r-1)*5)+fold), trait_idx] <- Vu / (Vu + Ve)
diff --git a/R/MyFun_BIC_Meng.R b/R/MyFun_BIC_Meng.R
index 084b51b..aae20da 100644
--- a/R/MyFun_BIC_Meng.R
+++ b/R/MyFun_BIC_Meng.R
@@ -43,9 +43,9 @@
#' function for BIC calculation
#'
-#' @param y describe documentation
-#' @param PC describe documentation
-#' @param K describe documentation
+#' @param y length N vector
+#' @param PC matrix of principal components with N rows and P columns
+#' @param K kinship matrix with N rows and N columns
#'
#' @import rrBLUP
#' @importFrom MASS ginv
diff --git a/R/app_server.R b/R/app_server.R
index c74809a..2052408 100644
--- a/R/app_server.R
+++ b/R/app_server.R
@@ -3,6 +3,8 @@
#' @param input,output,session Internal parameters for {shiny}.
#' DO NOT REMOVE.
#' @import shiny
+#' @importFrom httr GET content status_code
+#' @importFrom curl new_handle curl_fetch_memory
#' @noRd
app_server <- function(input, output, session) {
# Your application server logic
@@ -68,6 +70,91 @@ app_server <- function(input, output, session) {
)
))
})
+
+ #Check for updates from GitHub for BIGapp
+ get_latest_github_commit <- function(repo, owner) {
+ url <- paste0("https://api.github.com/repos/", owner, "/", repo, "/releases/latest")
+ response <- GET(url)
+ content <- content(response, "parsed")
+
+ if (status_code(response) == 200) {
+ tag_name <- content$tag_name
+ clean_tag_name <- sub("-.*", "", tag_name)
+ clean_tag_name <- sub("v", "", clean_tag_name)
+ return(clean_tag_name)
+ } else {
+ return(NULL)
+ }
+ }
+
+ is_internet_connected <- function() {
+ handle <- new_handle()
+ success <- tryCatch({
+ curl_fetch_memory("https://www.google.com", handle = handle)
+ TRUE
+ }, error = function(e) {
+ FALSE
+ })
+ return(success)
+ }
+
+ observeEvent(input$updates_info_button, {
+ # Check internet connectivity
+ if (!is_internet_connected()) {
+ # Display internet connectivity issues message
+ showModal(modalDialog(
+ title = "No Internet Connection",
+ easyClose = TRUE,
+ footer = tagList(
+ modalButton("Close")
+ ),
+ "Please check your internet connection and try again."
+ ))
+ return()
+ }
+
+ package_name <- "BIGapp"
+ repo_name <- "BIGapp" # GitHub repo name
+ repo_owner <- "Breeding-Insight" # User or organization name
+
+ # Get the installed version
+ installed_version <- as.character(packageVersion(package_name))
+
+ # Get the latest version from GitHub (can be tag version or latest commit)
+ latest_commit <- get_latest_github_commit(repo_name, repo_owner)
+
+ # Compare versions and prepare message
+ if (latest_commit > installed_version) {
+ update_status <- "A new version is available. Please update your package."
+ # Prepare styled HTML text for the modal
+ message_html <- paste(
+ "Installed version:", installed_version, "
",
+ #"Latest version commit SHA:", latest_commit, "
",
+ "A new version is available on GitHub!
",
+ "Please update your package."
+ )
+ } else {
+ update_status <- "Your package is up-to-date!"
+ # Prepare non-styled text for no update needed
+ message_html <- paste(
+ "Installed version:", installed_version, "
",
+ #"Latest version commit SHA:", latest_commit, "
",
+ update_status
+ )
+ }
+
+ # Display message in a Shiny modal
+ showModal(modalDialog(
+ title = "BIGapp Updates",
+ size = "m",
+ easyClose = TRUE,
+ footer = tagList(
+ modalButton("Close")
+ ),
+ # Use HTML to format the message and include styling
+ HTML(message_html)
+ ))
+ })
#Download Session Info
output$download_session_info <- downloadHandler(
diff --git a/R/app_ui.R b/R/app_ui.R
index 18a0d76..c9b058b 100644
--- a/R/app_ui.R
+++ b/R/app_ui.R
@@ -34,43 +34,53 @@ app_ui <- function(request) {
href = "#",
"Session Info",
onclick = "Shiny.setInputValue('session_info_button', Math.random())"
+ ),
+ tags$a(
+ class = "dropdown-item",
+ href = "#",
+ "Check for Updates",
+ onclick = "Shiny.setInputValue('updates_info_button', Math.random())"
)
)
)
),
help = NULL, #This is the default bs4Dash button to control the presence of tooltips and popovers, which can be added as a user help/info feature.
bs4DashSidebar(
- skin="light", status = "info",
+ skin="light",
+ status = "info",
+ fixed=TRUE,
+ #minified = F,
+ expandOnHover = TRUE,
sidebarMenu(id = "MainMenu",
flat = FALSE,
tags$li(class = "header", style = "color: grey; margin-top: 10px; margin-bottom: 10px; padding-left: 15px;", "Menu"),
- menuItem("Home", tabName = "welcome", icon = icon("house")),
+ menuItem("Home", tabName = "welcome", icon = icon("house"),startExpanded = FALSE),
tags$li(class = "header", style = "color: grey; margin-top: 18px; margin-bottom: 10px; padding-left: 15px;", "Genotype Processing"),
- menuItem("DArT Report2VCF", tabName = "dosage2vcf", icon = icon("share-from-square")),
- menuItem("Updog Dosage Calling", tabName = "updog", icon = icon("list-ol")),
+ menuItem("Convert to VCF", tabName = "dosage2vcf", icon = icon("share-from-square")),
+ menuItem("Dosage Calling", tabName = "updog", icon = icon("list-ol")),
menuItem("VCF Filtering", tabName = "filtering", icon = icon("filter")),
+ tags$li(class = "header", style = "color: grey; margin-top: 18px; margin-bottom: 10px; padding-left: 15px;", "Summary Metrics"),
+ menuItem("Genomic Diversity", tabName = "diversity", icon = icon("chart-pie")),
tags$li(class = "header", style = "color: grey; margin-top: 18px; margin-bottom: 10px; padding-left: 15px;", "Population Structure"),
menuItem("PCA", tabName = "pca", icon = icon("chart-simple")),
menuItem("DAPC", tabName = "dapc", icon = icon("circle-nodes")),
- tags$li(class = "header", style = "color: grey; margin-top: 18px; margin-bottom: 10px; padding-left: 15px;", "Summary Metrics"),
- menuItem("Genomic Diversity", tabName = "diversity", icon = icon("chart-pie")),
tags$li(class = "header", style = "color: grey; margin-top: 18px; margin-bottom: 10px; padding-left: 15px;", "GWAS"),
- menuItem("GWASpoly", tabName = "gwas", icon = icon("think-peaks")),
+ menuItem("GWASpoly", tabName = "gwas", icon = icon("think-peaks")),
tags$li(class = "header", style = "color: grey; margin-top: 18px; margin-bottom: 10px; padding-left: 15px;", "Genomic Selection"),
menuItem(
- span("Predictive Ability", bs4Badge("beta", position = "right", color = "success")),
+ span("Predictive Ability"),
tabName = "prediction_accuracy",
icon = icon("right-left")),
menuItem(
- span("Genomic Prediction", bs4Badge("beta", position = "right", color = "success")),
+ span("Genomic Prediction"),
tabName = "prediction",
icon = icon("angles-right")),
tags$li(class = "header", style = "color: grey; margin-top: 18px; margin-bottom: 10px; padding-left: 15px;", "Information"),
menuItem("Source Code", icon = icon("circle-info"), href = "https://www.github.com/Breeding-Insight/Genomics_Shiny_App"),
- menuItem(
- span("Job Queue", bs4Badge("demo", position = "right", color = "warning")),
- tabName = "slurm",
- icon = icon("clock")),
+ #menuItem(
+ # span("Job Queue", bs4Badge("demo", position = "right", color = "warning")),
+ # tabName = "slurm",
+ # icon = icon("clock")),
menuItem("Help", tabName = "help", icon = icon("circle-question"))
)
),
@@ -94,7 +104,7 @@ app_ui <- function(request) {
),
left = div(
style = "display: flex; align-items: center; height: 100%;", # Center the version text vertically
- "v0.6.1")
+ "v1.0.0")
),
dashboardBody(
disconnectMessage(), #Adds generic error message for any error if not already accounted for
diff --git a/R/mod_DosageCall.R b/R/mod_DosageCall.R
index f62191f..ed737b7 100644
--- a/R/mod_DosageCall.R
+++ b/R/mod_DosageCall.R
@@ -10,17 +10,28 @@
#' @importFrom shiny NS tagList
#' @importFrom future availableCores
#' @importFrom bs4Dash renderValueBox
+#' @import shinydisconnect
#'
#'
mod_DosageCall_ui <- function(id){
ns <- NS(id)
tagList(
fluidPage(
+ disconnectMessage(
+ text = "An unexpected error occurred, please reload the application and check the input file(s).",
+ refresh = "Reload now",
+ background = "white",
+ colour = "grey",
+ overlayColour = "grey",
+ overlayOpacity = 0.3,
+ refreshColour = "purple"
+ ),
fluidRow(
box(
title = "Inputs", status = "info", solidHeader = TRUE, collapsible = FALSE, collapsed = FALSE,
- fileInput(ns("madc_file"), "Choose MADC or VCF File", accept = c(".csv",".vcf",".gz")),
- fileInput(ns("madc_passport"), "Choose Passport File (optional)", accept = c(".csv")),
+ "* Required",
+ fileInput(ns("madc_file"), "Choose MADC or VCF File*", accept = c(".csv",".vcf",".gz")),
+ fileInput(ns("madc_passport"), "Choose Trait File", accept = c(".csv")),
conditionalPanel(
condition = "output.passportTablePopulated",
ns = ns,
@@ -88,19 +99,19 @@ mod_DosageCall_ui <- function(id){
p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
p(HTML("Results description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
- p(HTML("Updog tutorial:"), actionButton(ns("goUpdog"), icon("arrow-up-right-from-square", verify_fa = FALSE), onclick ="window.open('https://dcgerard.github.io/updog/', '_blank')" )),
+ p(HTML("Updog tutorial:"), actionButton(ns("goUpdog"), icon("arrow-up-right-from-square", verify_fa = FALSE), onclick ="window.open('https://dcgerard.github.io/updog/', '_blank')" )), hr(),
+ actionButton(ns("dosage_summary"), "Summary"),
circle = FALSE,
status = "warning",
icon = icon("info"), width = "500px",
tooltip = tooltipOptions(title = "Click to see info!")
))
),
- valueBoxOutput(ns("MADCsnps"))
- ),
-
- fluidRow(
- box(title = "Status", width = 3, collapsible = TRUE, status = "info",
+ column(width=4,
+ valueBoxOutput(ns("MADCsnps"), width=12),
+ box(title = "Status", width = 12, collapsible = TRUE, status = "info",
progressBar(id = ns("pb_madc"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ")
+ )
)
)
)
@@ -160,7 +171,7 @@ mod_DosageCall_server <- function(input, output, session, parent_session){
# Update dropdown menu choices based on uploaded passport file
passport_table <- reactive({
validate(
- need(!is.null(input$madc_passport), "Upload passport file to access results in this section."),
+ need(!is.null(input$madc_passport), "Upload Trait File to access results in this section."),
)
info_df <- read.csv(input$madc_passport$datapath, header = TRUE, check.names = FALSE)
info_df[,1] <- as.character(info_df[,1]) #Makes sure that the sample names are characters instead of numeric
@@ -363,6 +374,25 @@ mod_DosageCall_server <- function(input, output, session, parent_session){
return()
}
+
+ if (nrow(matrices$ref_matrix) == 0 || nrow(matrices$size_matrix) == 0) {
+ shinyalert(
+ title = "Data Warning!",
+ text = "All markers are missing read count information for reference and alternate alleles",
+ size = "s",
+ closeOnEsc = TRUE,
+ closeOnClickOutside = FALSE,
+ html = TRUE,
+ type = "error",
+ showConfirmButton = TRUE,
+ confirmButtonText = "OK",
+ confirmButtonCol = "#004192",
+ showCancelButton = FALSE,
+ animation = TRUE
+ )
+
+ return()
+ }
#Run Updog
#I am also taking the ploidy from the max value in the
@@ -392,7 +422,8 @@ mod_DosageCall_server <- function(input, output, session, parent_session){
output$download_updog_vcf <- downloadHandler(
filename = function() {
- paste0(input$output_name, ".vcf.gz")
+ output_name <- gsub("\\.vcf$", "", input$output_name)
+ paste0(output_name, ".vcf.gz")
},
content = function(file) {
#Save Updog output as VCF file
@@ -428,6 +459,68 @@ mod_DosageCall_server <- function(input, output, session, parent_session){
ex <- system.file("iris_DArT_MADC.csv", package = "BIGapp")
file.copy(ex, file)
})
+
+ ##Summary Info
+ dosage_summary_info <- function() {
+ #Handle possible NULL values for inputs
+ genotype_file_name <- if (!is.null(input$madc_file$name)) input$madc_file$name else "No file selected"
+ report_file_name <- if (!is.null(input$madc_passport$name)) input$madc_passport$name else "No file selected"
+ selected_ploidy <- if (!is.null(input$ploidy)) as.character(input$ploidy) else "Not selected"
+
+ #Print the summary information
+ cat(
+ "BIGapp Dosage Calling Summary\n",
+ "\n",
+ paste0("Date: ", Sys.Date()), "\n",
+ paste("R Version:", R.Version()$version.string), "\n",
+ "\n",
+ "### Input Files ###\n",
+ "\n",
+ paste("Input Genotype File:", genotype_file_name), "\n",
+ paste("Input Passport File:", report_file_name), "\n",
+ "\n",
+ "### User Selected Parameters ###\n",
+ "\n",
+ paste("Selected Ploidy:", selected_ploidy), "\n",
+ paste("Selected Updog Model:", input$updog_model), "\n",
+ "\n",
+ "### R Packages Used ###\n",
+ "\n",
+ paste("BIGapp:", packageVersion("BIGapp")), "\n",
+ paste("BIGr:", packageVersion("BIGr")), "\n",
+ paste("Updog:", packageVersion("updog")), "\n",
+ paste("dplyr:", packageVersion("dplyr")), "\n",
+ sep = ""
+ )
+ }
+
+ # Popup for analysis summary
+ observeEvent(input$dosage_summary, {
+ showModal(modalDialog(
+ title = "Summary Information",
+ size = "l",
+ easyClose = TRUE,
+ footer = tagList(
+ modalButton("Close"),
+ downloadButton("download_dosage_info", "Download")
+ ),
+ pre(
+ paste(capture.output(dosage_summary_info()), collapse = "\n")
+ )
+ ))
+ })
+
+
+ # Download Summary Info
+ output$download_dosage_info <- downloadHandler(
+ filename = function() {
+ paste("DosageCalling_summary_", Sys.Date(), ".txt", sep = "")
+ },
+ content = function(file) {
+ # Write the summary info to a file
+ writeLines(paste(capture.output(dosage_summary_info()), collapse = "\n"), file)
+ }
+ )
}
## To be copied in the UI
diff --git a/R/mod_Filtering.R b/R/mod_Filtering.R
index 7225f6c..511bcc2 100644
--- a/R/mod_Filtering.R
+++ b/R/mod_Filtering.R
@@ -13,20 +13,30 @@
#' @importFrom shinyjs enable disable useShinyjs
#'
#' @import dplyr
+#' @import shinydisconnect
#'
#'
mod_Filtering_ui <- function(id){
ns <- NS(id)
tagList(
fluidRow(
+ disconnectMessage(
+ text = "An unexpected error occurred, please reload the application and check the input file(s).",
+ refresh = "Reload now",
+ background = "white",
+ colour = "grey",
+ overlayColour = "grey",
+ overlayOpacity = 0.3,
+ refreshColour = "purple"
+ ),
column(width = 3,
box(width = 12,
title = "Quality Filtering", status = "info", solidHeader = TRUE, collapsible = TRUE, collapsed = FALSE,
fileInput(ns("updog_rdata"),"Choose VCF File", accept = c(".vcf",".gz")),
textInput(ns("filter_output_name"), "Output File Name"),
- numericInput(ns("filter_ploidy"),"Ploidy", min = 0, value = NULL),
+ numericInput(ns("filter_ploidy"),"Species Ploidy", min = 0, value = NULL),
numericInput(ns("filter_maf"),"MAF filter", min = 0, max=1, value = 0.05, step = 0.01),
- sliderInput(ns("size_depth"),"Min Read Depth (Marker per Sample)", min = 0, max = 300, value = 10, step = 1),
+ numericInput(ns("size_depth"),"Min Read Depth (Marker per Sample)", min = 0, max = 300, value = 10, step = 1),
numericInput(ns("snp_miss"),"Remove SNPs with >= % missing data", min = 0, max = 100, value = 50, step = 1),
numericInput(ns("sample_miss"),"Remove Samples with >= % missing data", min = 0, max = 100, value = 50, step = 1),
"Updog Filtering Parameters",
@@ -40,16 +50,16 @@ mod_Filtering_ui <- function(id){
numericInput(ns("maxpostprob_filter"), "Minimum maxpostprob (Updog filter)", min = 0, value = 0.5, step = 0.1)
)
),
- actionButton(ns("run_filters"), "Apply filters"),
+ actionButton(ns("run_filters"), "Apply Filters"),
useShinyjs(),
- downloadButton(ns("start_updog_filter"), "Download Filtered VCF", icon = icon("download"), class = "butt"),
+ downloadButton(ns("start_updog_filter"), "Download", icon = icon("download"), class = "butt"),
div(style="display:inline-block; float:right",dropdownButton(
- tags$h3("Updog Filter Parameters"),
- "You can download examples of the expected file here: \n",
- downloadButton(ns('download_vcf'), "Download VCF Example File"),
- # "Add description of each filter. Presently, all filtering parameters that are typically used for processing
- # a VCF file from Updog dosage calling are included. If a VCF file does not contain these values, it will only be
- # filtered for read depth, missing data, and maf.",
+ HTML("Input files"),
+ p(downloadButton(ns('download_vcf'),""), "VCF Example File"),
+ p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("Results description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ actionButton(ns("filtering_summary"), "Summary"),
circle = FALSE,
status = "warning",
icon = icon("info"), width = "300px",
@@ -102,6 +112,43 @@ mod_Filtering_ui <- function(id){
mod_Filtering_server <- function(input, output, session, parent_session){
ns <- session$ns
+
+ # Help links
+ observeEvent(input$goPar, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "VCF_Filtering_tabset",
+ selected = "VCF_Filtering_par")
+ # expand specific box
+ updateBox(id = "VCF_Filtering_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goRes, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "VCF_Filtering_tabset",
+ selected = "VCF_Filtering_results")
+ # expand specific box
+ updateBox(id = "VCF_Filtering_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goCite, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "VCF_Filtering_tabset",
+ selected = "VCF_Filtering_cite")
+ # expand specific box
+ updateBox(id = "VCF_Filtering_box", action = "toggle", session = parent_session)
+ })
#vcf
filtering_files <- reactiveValues(
@@ -318,7 +365,8 @@ mod_Filtering_server <- function(input, output, session, parent_session){
#Updog filtering
output$start_updog_filter <- downloadHandler(
filename = function() {
- paste0(input$filter_output_name, ".vcf.gz")
+ output_name <- gsub("\\.vcf$", "", input$filter_output_name)
+ paste0(output_name, ".vcf.gz")
},
content = function(file) {
@@ -623,6 +671,74 @@ mod_Filtering_server <- function(input, output, session, parent_session){
ex <- system.file("iris_DArT_VCF.vcf.gz", package = "BIGapp")
file.copy(ex, file)
})
+
+ ##Summary Info
+ filtering_summary_info <- function() {
+ #Handle possible NULL values for inputs
+ genotype_file_name <- if (!is.null(input$updog_rdata$name)) input$updog_rdata$name else "No file selected"
+ selected_ploidy <- if (!is.null(input$filter_ploidy)) as.character(input$filter_ploidy) else "Not selected"
+
+ #Print the summary information
+ cat(
+ "BIGapp VCF Filtering Summary\n",
+ "\n",
+ paste0("Date: ", Sys.Date()), "\n",
+ paste(R.Version()$version.string), "\n",
+ "\n",
+ "### Input Files ###\n",
+ "\n",
+ paste("Input Genotype File:", genotype_file_name), "\n",
+ "\n",
+ "### User Selected Parameters ###\n",
+ "\n",
+ paste("Selected Ploidy:", selected_ploidy), "\n",
+ paste("MAF Filter:", input$filter_maf), "\n",
+ paste("Min Read Depth (Marker per Sample):", input$size_depth), "\n",
+ paste("Remove SNPs with >= % missing data:", input$snp_miss), "\n",
+ paste("Remove Samples with >= % missing data:", input$sample_miss), "\n",
+ paste("Use Updog Filtering Parameters?:", input$use_updog), "\n",
+ paste("Max OD (Updog filter):", ifelse(input$use_updog,input$OD_filter, "NA")), "\n",
+ paste("Bias Minimum (Updog filter):", ifelse(input$use_updog,input$Bias[1], "NA")), "\n",
+ paste("Bias Maximum (Updog filter):", ifelse(input$use_updog,input$Bias[2], "NA")), "\n",
+ paste("Max Prop_mis (Updog filter):", ifelse(input$use_updog,input$Prop_mis,"NA")), "\n",
+ paste("Minimum maxpostprob (Updog filter):", ifelse(input$use_updog,input$maxpostprob_filter,"NA")), "\n",
+ "\n",
+ "### R Packages Used ###\n",
+ "\n",
+ paste("BIGapp:", packageVersion("BIGapp")), "\n",
+ paste("BIGr:", packageVersion("BIGr")), "\n",
+ paste("Updog:", packageVersion("updog")), "\n",
+ sep = ""
+ )
+ }
+
+ # Popup for analysis summary
+ observeEvent(input$filtering_summary, {
+ showModal(modalDialog(
+ title = "Summary Information",
+ size = "l",
+ easyClose = TRUE,
+ footer = tagList(
+ modalButton("Close"),
+ downloadButton("download_filtering_info", "Download")
+ ),
+ pre(
+ paste(capture.output(filtering_summary_info()), collapse = "\n")
+ )
+ ))
+ })
+
+
+ # Download Summary Info
+ output$download_filtering_info <- downloadHandler(
+ filename = function() {
+ paste("Filtering_summary_", Sys.Date(), ".txt", sep = "")
+ },
+ content = function(file) {
+ # Write the summary info to a file
+ writeLines(paste(capture.output(filtering_summary_info()), collapse = "\n"), file)
+ }
+ )
}
## To be copied in the UI
diff --git a/R/mod_GS.R b/R/mod_GS.R
index ee9c9fd..cf65eb8 100644
--- a/R/mod_GS.R
+++ b/R/mod_GS.R
@@ -12,21 +12,31 @@
#' @importFrom bs4Dash valueBox
#' @importFrom shiny NS tagList
#' @importFrom shinyWidgets virtualSelectInput progressBar
+#' @import shinydisconnect
#'
#'
mod_GS_ui <- function(id){
ns <- NS(id)
tagList(
fluidRow(
+ disconnectMessage(
+ text = "An unexpected error occurred, please reload the application and check the input file(s).",
+ refresh = "Reload now",
+ background = "white",
+ colour = "grey",
+ overlayColour = "grey",
+ overlayOpacity = 0.3,
+ refreshColour = "purple"
+ ),
column(width = 3,
box(title="Inputs", width = 12, collapsible = TRUE, collapsed = FALSE, status = "info", solidHeader = TRUE,
- fileInput(ns("pred_known_file"), "Choose Training VCF File", accept = c(".csv",".vcf",".gz")),
- fileInput(ns("pred_trait_file"), "Choose Passport File", accept = ".csv"),
- fileInput(ns("pred_est_file"), "Choose Prediction VCF File", accept = c(".csv",".vcf",".gz")),
- numericInput(ns("pred_est_ploidy"), "Species Ploidy", min = 1, value = NULL),
+ "* Required",
+ fileInput(ns("pred_known_file"), "Choose VCF File*", accept = c(".csv",".vcf",".gz")),
+ fileInput(ns("pred_trait_file"), "Choose Trait File*", accept = ".csv"),
+ numericInput(ns("pred_est_ploidy"), "Species Ploidy*", min = 1, value = NULL),
virtualSelectInput(
inputId = ns("pred_trait_info2"),
- label = "Select Trait (eg, Color):",
+ label = "Select Trait*",
choices = NULL,
showValueAsTags = TRUE,
search = TRUE,
@@ -34,35 +44,57 @@ mod_GS_ui <- function(id){
),
virtualSelectInput(
inputId = ns("pred_fixed_info2"),
- label = "Select Fixed Effects (optional) (not validated):",
+ label = span("Select Fixed Effects", bs4Badge("beta", position = "right", color = "success")),
choices = NULL,
showValueAsTags = TRUE,
search = TRUE,
multiple = TRUE
),
+ conditionalPanel(
+ condition = "input.pred_fixed_info2.length > 0", ns = ns,
+ div(
+ "(unselected will be considered covariates)",
+ virtualSelectInput(
+ inputId = ns("pred_fixed_cat2"),
+ label = "Select Categorical Fixed Effects",
+ choices = NULL,
+ showValueAsTags = TRUE,
+ search = TRUE,
+ multiple = TRUE
+ )
+ )
+ ),
actionButton(ns("prediction_est_start"), "Run Analysis"),
div(style="display:inline-block; float:right",dropdownButton(
- tags$h3("GP Parameters"),
- "You can download examples of the expected input input files here: \n",
- downloadButton(ns('download_vcft'), "Download Training VCF Example File"),
- downloadButton(ns('download_pheno'), "Download Passport Example File"),
- downloadButton(ns('download_vcfp'), "Download Prediction VCF Example File"),
-
- #"GP uses the rrBLUP package: It can impute missing data, adapt to different ploidy, perform 5-fold cross validations with different number of iterations, run multiple traits, and accept multiple fixed effects.",
+ HTML("Input files"),
+ p(downloadButton(ns('download_vcf'),""), "VCF Example File"),
+ p(downloadButton(ns('download_pheno'),""), "Trait Example File"),
+ p(downloadButton(ns('download_vcfp'), ""), "Download Prediction VCF Example File"),hr(),
+ p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("Results description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ actionButton(ns("pred_summary"), "Summary"),
circle = FALSE,
status = "warning",
icon = icon("info"), width = "300px",
tooltip = tooltipOptions(title = "Click to see info!")
- ))
+ )),
+ tags$hr(style="border-color: #d3d3d3; margin-top: 20px; margin-bottom: 20px;"), # Lighter grey line
+ div(style="text-align: left; margin-top: 10px;",
+ actionButton(ns("advanced_options_pred"),
+ label = HTML(paste(icon("cog", style = "color: #007bff;"), "Advanced Options (beta)")),
+ style = "background-color: transparent; border: none; color: #007bff; font-size: smaller; text-decoration: underline; padding: 0;"
+ )
+ )
)
),
column(width = 6,
- box(title = "Results", status = "info", solidHeader = FALSE, width = 12, height = 600,
+ box(title = "Results", status = "info", solidHeader = FALSE, width = 12, height = 600, maximizable = T,
bs4Dash::tabsetPanel(
- tabPanel("Predicted Trait Table", DTOutput(ns("pred_trait_table")), style = "overflow-y: auto; height: 500px"),
- tabPanel("GEBVs Table", DTOutput(ns("pred_gebvs_table2")),style = "overflow-y: auto; height: 500px")
+ tabPanel("Predicted Pheno Table", DTOutput(ns("pred_trait_table")), style = "overflow-y: auto; height: 500px"),
+ tabPanel("EBVs Table", DTOutput(ns("pred_gebvs_table2")),style = "overflow-y: auto; height: 500px")
)
)
@@ -71,7 +103,7 @@ mod_GS_ui <- function(id){
column(width = 3,
valueBoxOutput("shared_snps", width = NULL),
box(title = "Status", width = 12, collapsible = TRUE, status = "info",
- progressBar(id = "pb_gp", value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ")
+ progressBar(id = ns("pb_gp"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ")
),
box(title = "Plot Controls", status = "warning", solidHeader = TRUE, collapsible = TRUE, width = 12,
div(style="display:inline-block; float:left",dropdownButton(
@@ -106,6 +138,136 @@ mod_GS_ui <- function(id){
mod_GS_server <- function(input, output, session, parent_session){
ns <- session$ns
+
+ # Help links
+ observeEvent(input$goPar, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "Genomic_Prediction_tabset",
+ selected = "Genomic_Prediction_par")
+ # expand specific box
+ updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goRes, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "Genomic_Prediction_tabset",
+ selected = "Genomic_Prediction_results")
+ # expand specific box
+ updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goCite, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "Genomic_Prediction_tabset",
+ selected = "Genomic_Prediction_cite")
+ # expand specific box
+ updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session)
+ })
+
+ #Default model choices
+ advanced_options_pred <- reactiveValues(
+ pred_model = "GBLUP",
+ pred_matrix = "Gmatrix",
+ pred_est_file = NULL,
+ ped_file = NULL
+ )
+
+ pred_outputs <- reactiveValues(corr_output = NULL,
+ comb_output = NULL,
+ all_GEBVs = NULL,
+ avg_GEBVs = NULL)
+
+ #List the ped file name if previously uploaded
+ output$uploaded_file_name <- renderText({
+ if (!is.null(advanced_options_pred$ped_file)) {
+ paste("Previously uploaded file:", advanced_options_pred$ped_file$name)
+ } else {
+ "" # Return an empty string if no file has been uploaded
+ }
+ })
+
+ output$uploaded_file_name_pred <- renderText({
+ if (!is.null(advanced_options_pred$pred_est_file)) {
+ paste("Previously uploaded file:", advanced_options_pred$pred_est_file$name)
+ } else {
+ "" # Return an empty string if no file has been uploaded
+ }
+ })
+
+ #UI popup window for input
+ observeEvent(input$advanced_options_pred, {
+ showModal(modalDialog(
+ title = "Advanced Options (beta)",
+ selectInput(
+ inputId = ns('pred_model'),
+ label = 'Method Choice',
+ choices = c("GBLUP"),
+ selected = advanced_options_pred$pred_model # Initialize with stored value
+ ),
+ conditionalPanel(
+ condition = "input.pred_model == 'GBLUP'", ns = ns,
+ div(
+ selectInput(
+ inputId = ns('pred_matrix'),
+ label = 'Relationship Matrix Choice',
+ #choices = c("Gmatrix", "Amatrix", "Hmatrix"),
+ choices = c("Gmatrix"),
+ selected = advanced_options_pred$pred_matrix # Initialize with stored value
+ )
+ )
+ ),
+ conditionalPanel(
+ condition = "input.pred_matrix != 'Amatrix'", ns = ns,
+ div(
+ fileInput(ns("pred_est_file"), "Choose Prediction VCF", accept = c(".vcf",".gz")),
+ conditionalPanel(
+ condition = "output.uploaded_file_name_pred !== ''", # Show only if there's content
+ textOutput(ns("uploaded_file_name_pred")) # Display the uploaded file name
+ )
+ )
+ ),
+ conditionalPanel(
+ condition = "input.pred_matrix != 'Gmatrix'", ns = ns,
+ div(
+ fileInput(ns("ped_file"), "Choose Pedigree File", accept = ".csv"),
+ conditionalPanel(
+ condition = "output.uploaded_file_name !== ''", # Show only if there's content
+ textOutput(ns("uploaded_file_name")) # Display the uploaded file name
+ )
+ )
+ ),
+ footer = tagList(
+ modalButton("Close"),
+ actionButton(ns("save_advanced_options_pred"), "Save")
+ )
+ ))
+ })
+
+
+
+ #Close popup window when user "saves options"
+ observeEvent(input$save_advanced_options_pred, {
+ advanced_options_pred$pred_model <- input$pred_model
+ advanced_options_pred$pred_matrix <- input$pred_matrix
+ advanced_options_pred$pred_est_file <- input$pred_est_file
+ advanced_options_pred$ped_file <- input$ped_file
+ # Save other inputs as needed
+
+ removeModal() # Close the modal after saving
+ })
+
###Genomic Prediction
#This tab involved 3 observeEvents
#1) to get the traits listed in the phenotype file
@@ -118,12 +280,12 @@ mod_GS_server <- function(input, output, session, parent_session){
info_df2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, nrow = 0)
trait_var2 <- colnames(info_df2)
trait_var2 <- trait_var2[2:length(trait_var2)]
- #updateSelectInput(session, "pred_trait_info", choices = c("All", trait_var))
updateVirtualSelect("pred_fixed_info2", choices = trait_var2, session = session)
updateVirtualSelect("pred_trait_info2", choices = trait_var2, session = session)
-
- #output$passport_table <- renderDT({info_df}, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 4)
- #)
+ })
+
+ observeEvent(input$pred_fixed_info2, {
+ updateVirtualSelect("pred_fixed_cat2", choices = input$pred_fixed_info2, session = session)
})
#2) Error check for prediction and save input files
@@ -134,7 +296,8 @@ mod_GS_server <- function(input, output, session, parent_session){
est_geno_input = NULL,
shared_snps = NULL,
pred_genos = NULL,
- pred_geno_pheno = NULL
+ pred_geno_pheno = NULL,
+ matrix = NULL
)
pred_outputs2 <- reactiveValues(
@@ -163,7 +326,7 @@ mod_GS_server <- function(input, output, session, parent_session){
toggleClass(id = "pred_est_ploidy", class = "borderred", condition = (is.na(input$pred_est_ploidy) | is.null(input$pred_est_ploidy)))
- if (is.null(input$pred_known_file$datapath) | is.null(input$pred_est_file$datapath) | is.null(input$pred_trait_file$datapath)) {
+ if (is.null(input$pred_known_file$datapath) | is.null(input$pred_trait_file$datapath)) {
shinyalert(
title = "Missing input!",
text = "Upload VCF and phenotype files",
@@ -179,7 +342,7 @@ mod_GS_server <- function(input, output, session, parent_session){
animation = TRUE
)
}
- req(input$pred_known_file$datapath, input$pred_est_file$datapath, input$pred_trait_file$datapath, input$pred_est_ploidy)
+ req(input$pred_known_file$datapath, input$pred_trait_file$datapath, input$pred_est_ploidy)
#Status
updateProgressBar(session = session, id = "pb_gp", value = 5, title = "Checking input files")
@@ -200,7 +363,7 @@ mod_GS_server <- function(input, output, session, parent_session){
# If condition is met, show notification toast
shinyalert(
- title = "Oops",
+ title = "Missing input!",
text = "No traits were selected",
size = "xs",
closeOnEsc = TRUE,
@@ -252,19 +415,27 @@ mod_GS_server <- function(input, output, session, parent_session){
}
#Convert VCF file if submitted
- train_vcf <- vcfR::read.vcfR(train_geno_path)
- est_vcf <- vcfR::read.vcfR(est_geno_path)
+ train_vcf <- vcfR::read.vcfR(train_geno_path, verbose = FALSE)
+ if (is.null(est_geno_path) || is.na(est_geno_path)){
+ est_vcf <- NULL
+ } else {
+ est_vcf <- vcfR::read.vcfR(est_geno_path, verbose = FALSE)
+ }
#Get number of SNPs
#pred_inputs$pred_snps <- nrow(vcf)
#Extract GT
+ if (is.null(est_vcf)) {
+ est_geno <- NULL
+ } else {
+ est_geno <- extract.gt(est_vcf, element = "GT")
+ est_geno <- apply(est_geno, 2, convert_to_dosage)
+ class(est_geno) <- "numeric"
+ }
train_geno <- extract.gt(train_vcf, element = "GT")
train_geno <- apply(train_geno, 2, convert_to_dosage)
- est_geno <- extract.gt(est_vcf, element = "GT")
- est_geno <- apply(est_geno, 2, convert_to_dosage)
class(train_geno) <- "numeric"
- class(est_geno) <- "numeric"
rm(train_vcf)
rm(est_vcf)
@@ -272,7 +443,7 @@ mod_GS_server <- function(input, output, session, parent_session){
# If condition is met, show notification toast
shinyalert(
- title = "Oops",
+ title = "File Error",
text = "No valid genotype file detected",
size = "xs",
closeOnEsc = TRUE,
@@ -291,9 +462,6 @@ mod_GS_server <- function(input, output, session, parent_session){
return()
}
- #Save number of samples in file
- #pred_inputs$pred_genos <- ncol(geno)
-
#Check that the ploidy entered is correct
if (ploidy != max(train_geno, na.rm = TRUE)) {
# If condition is met, show notification toast
@@ -329,7 +497,11 @@ mod_GS_server <- function(input, output, session, parent_session){
#tranforming genotypes
train_geno_adj_init <- convert_genotype(train_geno, as.numeric(ploidy))
- est_geno_adj_init <- convert_genotype(est_geno, as.numeric(ploidy))
+ if (is.null(est_geno)) {
+ est_geno_adj_init <- NULL
+ } else {
+ est_geno_adj_init <- convert_genotype(est_geno, as.numeric(ploidy))
+ }
#Make sure the trait file and genotype file are in the same order
# Column names for geno (assuming these are the individual IDs)
@@ -346,8 +518,8 @@ mod_GS_server <- function(input, output, session, parent_session){
# If condition is met, show notification toast
shinyalert(
- title = "Oops",
- text = "All samples were missing from the phenotype file",
+ title = "Data Mismatch",
+ text = "All genotyped samples were missing from the phenotype file",
size = "xs",
closeOnEsc = TRUE,
closeOnClickOutside = FALSE,
@@ -368,8 +540,8 @@ mod_GS_server <- function(input, output, session, parent_session){
} else if (length(common_ids) < length(colnames_geno)) {
# If condition is met, show notification toast
shinyalert(
- title = "Data Mismatch",
- text = paste0((length(colnames_geno)-length(common_ids))," samples were removed for not having trait information"),
+ title = "Data Check",
+ text = paste0((length(common_ids))," samples in VCF File have trait information"),
size = "xs",
closeOnEsc = FALSE,
closeOnClickOutside = FALSE,
@@ -420,32 +592,45 @@ mod_GS_server <- function(input, output, session, parent_session){
}
}
)
-
-
- # Subset and reorder geno and pheno to ensure they only contain and are ordered by common IDs
- train_geno_adj <- train_geno_adj_init[, common_ids] # Assuming that the columns can be directly indexed by IDs
- pheno2 <- pheno2[match(common_ids, ids_pheno), ]
+
+ #Status
+ updateProgressBar(session = session, id = "pb_gp", value = 40, title = "Generating Matrices")
+
+ #Create relationship matrix depending on the input VCF files
+ if (is.null(advanced_options_pred$pred_est_file)) {
+ #Subset phenotype file by common IDs
+ pheno2 <- pheno2[common_ids, ]
+
+ #Matrix
+ Kin_mat <- A.mat(t(train_geno_adj_init), max.missing=NULL,impute.method="mean",return.imputed=FALSE)
+
+ } else{
+ #Subset phenotype file and training file by common IDs
+ pheno2 <- pheno2[common_ids, ]
+
+ #Match training and testing genotype file SNPs
+ common_markers <- intersect(rownames(train_geno_adj_init), rownames(est_geno_adj_init))
+ #first remove samples from training genotype matrix that are not in the phenotype file
+ train_geno_adj <- train_geno_adj_init[common_markers, common_ids]
+ #Merge the training and prediction genotype matrices
+ est_geno_adj_init <- est_geno_adj_init[common_markers, ]
+ train_pred_geno <- cbind(train_geno_adj, est_geno_adj_init)
+
+ #Matrix
+ Kin_mat <- A.mat(t(train_pred_geno), max.missing=NULL,impute.method="mean",return.imputed=FALSE)
+
+ }
#Save to reactive values
+ #pred_inputs2$shared_snps <- length(common_markers)
+ pred_inputs2$matrix <- Kin_mat
pred_inputs2$pheno_input <- pheno2
- #pred_inputs$geno_adj_input <- geno_adj
-
- #Match training and testing genotype file SNPs
- common_markers <- intersect(rownames(train_geno_adj), rownames(est_geno_adj_init))
- train_geno_adj <- train_geno_adj[common_markers, ]
- est_geno_adj_init <- est_geno_adj_init[common_markers, ]
-
- #Save to reactive values
- pred_inputs2$shared_snps <- length(common_markers)
- pred_inputs2$train_geno_input <- train_geno_adj
- pred_inputs2$est_geno_input <- est_geno_adj_init
-
})
#3) Analysis only proceeds once continue_prediction is converted to TRUE
observe({
- req(continue_prediction2(),pred_inputs2$pheno_input, pred_inputs2$train_geno_input)
+ req(continue_prediction2(),pred_inputs2$pheno_input, pred_inputs2$matrix)
# Stop analysis if cancel was selected
if (isFALSE(continue_prediction2())) {
@@ -454,220 +639,80 @@ mod_GS_server <- function(input, output, session, parent_session){
#Variables
ploidy <- as.numeric(input$pred_est_ploidy)
- train_geno_adj <- pred_inputs2$train_geno_input
- est_geno_adj <- pred_inputs2$est_geno_input
- pheno <- pred_inputs2$pheno_input
+ gmat <- pred_inputs2$matrix
+ pheno2 <- pred_inputs2$pheno_input
traits <- input$pred_trait_info2
- #CVs <- as.numeric(input$pred_cv)
- #train_perc <- as.numeric(input$pred_folds)
- fixed_traits <- input$pred_fixed_info2
- cores <- input$pred_cores
-
- ##Need to add ability for the use of parallelism for the for cross-validation
- ##Example at this tutorial also: https://www.youtube.com/watch?v=ARWjdQU6ays
-
- # Function to perform genomic prediction
- ##Make sure this is correct (I think I need to be generating a relationship matrix A.mat() to account for missing data, but I am not sure how that works with this)
- genomic_prediction2 <- function(train_geno,est_geno, Pheno, traits, fixed_effects = NULL, cores = 1) {
-
- # Define variables
- traits <- traits
- #cycles <- as.numeric(Iters)
- #Folds <- as.numeric(Fold)
- total_population <- ncol(train_geno)
- #train_size <- floor(percentage / 100 * total_population)
- fixed_traits <- fixed_effects
- cores <- as.numeric(cores)
-
- # Initialize a list to store GEBVs for all traits and cycles
- GEBVs <- list()
-
- #Cross validation number for progress bar (not involved in the calculations, just shiny visuals)
- pb_value = 10
-
- #Remove the fixed traits from the Pheno file
- if (length(fixed_traits) == 0) {
- Pheno <- Pheno
- } else {
- #Subset fixed traits
- Fixed <- subset(Pheno, select = fixed_traits)
-
- #Pheno <- subset(Pheno, select = -fixed_traits)
- convert_all_to_factor_if_not_numeric <- function(df) {
- for (col in names(df)) {
- if (!is.numeric(df[[col]]) && !is.integer(df[[col]])) {
- df[[col]] <- as.factor(df[[col]])
- }
- }
- return(df)
- }
- # Convert all columns to factor if they are not numeric or integer
- Fixed <- convert_all_to_factor_if_not_numeric(Fixed)
-
- #Fixed <- as.data.frame(lapply(Fixed, as.factor)) #convert to factor
- row.names(Fixed) <- row.names(Pheno)
-
- #Make the matrix
- formula_str <- paste("~", paste(fixed_traits, collapse = " + "))
- formula <- as.formula(formula_str)
-
- # Create the design matrix using the constructed formula
- Fixed <- model.matrix(formula, data = Fixed)
- }
-
- #Make kinship matrix of all individuals?
- #Kin_mat <- A.mat(t(geno), n.core = 1) ##Need to explore whether or not to use a Kinship matrix and if it makes a significant improvement to accuracy
- #If wanting to use Kkinship matrix, will then need to see how to implement it here
-
- #For now, I am just imputing the missing sites using mean, but EM is more accurate, but slower (can use multiple cores).
- impute = (A.mat(t(train_geno), max.missing=1,impute.method="mean",return.imputed=TRUE))
- train_geno <- impute$imputed
- impute = (A.mat(t(est_geno), max.missing=1,impute.method="mean",return.imputed=TRUE))
- est_geno <- impute$imputed
-
- #Match training and testing genotype file SNPs
- common_markers <- intersect(colnames(train_geno), colnames(est_geno))
- train_geno <- train_geno[ ,common_markers]
- est_geno <- est_geno[ ,common_markers]
-
- #Calculate predicted traits and GEBVs
- #fold_ids <- sample(rep(1:Folds, length.out = total_population))
- #fold_df <- data.frame(Sample = row.names(geno), FoldID = fold_ids) #Randomly assign each sample to a fold
- #fold_results <- matrix(nrow = Folds, ncol = length(traits))
- #colnames(fold_results) <- traits
-
- #Status
- updateProgressBar(session = session, id = "pb_gp", value = 50, title = "Estimating Predicted Values")
-
- train <- row.names(train_geno)
-
- #Subset datasets
- #if (length(fixed_traits) == 0) {
- # Fixed_train = NULL
- #} else{
- # Fixed_train <- data.frame(Fixed[train, ])
- # Fixed_train <- as.matrix(Fixed_train)
- # row.names(Fixed_train) <- train
- #colnames(Fixed_train) <- colnames(Fixed)
- Fixed_train = NULL
-
- #Fixed (testing)
- # Fixed_test<- data.frame(Fixed[test, ])
- # Fixed_test <- as.matrix(Fixed_test)
- # row.names(Fixed_test) <- test
- #colnames(Fixed_test) <- colnames(Fixed)
-
- Pheno_train <- Pheno[train, ] # Subset the phenotype df to only retain the relevant samples from the training set
- m_train <- train_geno
- #Pheno_test <- Pheno[test, ]
- #Fixed_test <- Fixed[test, ] #Where would the Fixed_test be used?
- m_valid <- est_geno
-
- print(dim(m_train))
- print(dim(m_valid))
-
- # Initialize a matrix to store GEBVs for this fold
- GEBVs_fold <- matrix(nrow = nrow(est_geno), ncol = length(traits))
- colnames(GEBVs_fold) <- c(traits)
- rownames(GEBVs_fold) <- row.names(est_geno)
-
- Pred_results <- matrix(nrow = nrow(est_geno), ncol = length(traits))
- colnames(Pred_results) <- c(traits)
- rownames(Pred_results) <- row.names(est_geno)
-
- #Evaluate each trait using the same train and testing samples for each
- for (trait_idx in 1:length(traits)) {
- trait <- Pheno_train[, traits[trait_idx]] # Get the trait of interest
- trait_answer <- mixed.solve(y= trait, Z = m_train, K = NULL, X = Fixed_train, SE = FALSE, return.Hinv = FALSE)
- TRT <- trait_answer$u
- e <- as.matrix(TRT)
- pred_trait_test <- m_valid %*% e
- pred_trait <- pred_trait_test[, 1] + c(trait_answer$beta) # Make sure this still works when using multiple traits
- Pred_results[, trait_idx] <- pred_trait #save to dataframe
-
- # Extract GEBVs
- # Check if Fixed_train is not NULL and include beta if it is
- if (!is.null(Fixed_train) && !is.null(trait_answer$beta)) {
- # Calculate GEBVs including fixed effects
- #GEBVs_fold[, trait_idx] <- m_train %*% trait_answer$u + Fixed_train %*% matrix(trait_answer$beta, nrow = length(trait_answer$beta), ncol = 1)
- #GEBVs_fold[, trait_idx] <- m_valid %*% trait_answer$u + Fixed_test %*% matrix(trait_answer$beta, nrow = length(trait_answer$beta), ncol = 1)
- GEBVs_fold[, trait_idx] <- m_valid %*% trait_answer$u + Fixed_test %*% trait_answer$beta
- } else {
- # Calculate GEBVs without fixed effects
- #GEBVs_fold[, trait_idx] <- m_train %*% trait_answer$u
- GEBVs_fold[, trait_idx] <- m_valid %*% trait_answer$u #Confirm it is accuract to calculate the GEBVs for testing group from the trained model
- }
-
- # Calculate heritability for the current trait
- #Vu <- trait_answer$Vu
- #Ve <- trait_answer$Ve
- #heritability_scores[(((r-1)*5)+fold), trait_idx] <- Vu / (Vu + Ve)
-
- }
- #Add iter and fold information for each trait/result
- #heritability_scores[(((r-1)*5)+fold), (length(traits)+1)] <- r
- #heritability_scores[(((r-1)*5)+fold), (length(traits)+2)] <- fold
-
- #Add sample, iteration, and fold information to GEBVs_fold
- #GEBVs_fold[,"Iter"] = r
- #GEBVs_fold[,"Fold"] = fold
- #GEBVs_fold[,"Sample"] <- row.names(est_geno)
-
- # Store GEBVs for this fold
- GEBVs_df <- data.frame(GEBVs_fold)
-
- Pred_results <- data.frame(Pred_results)
-
-
- # Store GEBVs for this cycle
- #GEBVs[[r]] <- do.call(rbind, GEBVs_cycle)
-
-
- # Combine all GEBVs into a single DataFrame
- #GEBVs_df <- as.data.frame(do.call(rbind, GEBVs))
-
- #results <- as.data.frame(results)
- #heritability_scores <- as.data.frame(heritability_scores)
-
- # Combine results and heritability_scores using cbind
- #combined_results <- cbind(results, heritability_scores)
-
- return(list(GEBVs = GEBVs_df, Predictions = Pred_results))
+ # Assign fixed_cat based on input$pred_fixed_cat2
+ if (!is.null(input$pred_fixed_cat2) && length(input$pred_fixed_cat2) > 0) {
+ fixed_cat <- input$pred_fixed_cat2
+ } else {
+ fixed_cat <- NULL
}
-
- # Example call to the function
- #This is slow when using 3k markers and 1.2k samples...will need to parallelize if using this script...
- results <- genomic_prediction2(train_geno_adj, est_geno_adj, pheno, traits = traits, fixed_effects = fixed_traits, cores = cores)
-
- #With fixed effects (need to inforporate the ability for fixed effects into the prediction?)
- #results <- genomic_prediction(geno_matrix, phenotype_df, c("height", "weight"), "~ age + sex")
-
- #Save to reactive value
- pred_outputs2$trait_output <- results$Predictions
- pred_outputs2$all_GEBVs <- results$GEBVs
- #TESTING!!!
- #write.csv(results$GEBVs, "GEBVs_test.csv")
-
- # Convert trait columns to numeric
- results$GEBVs <- results$GEBVs %>%
- mutate(across(all_of(traits), ~ as.numeric(.x)))
-
-
- #Get average accuracy and h2 for each iter accross the 5 folds
-
- #columns <- setdiff(colnames(results$CombinedResults), c("Iter","Fold"))
- #average_accuracy_df <- results$CombinedResults %>%
- # group_by(Iter) %>%
- # summarize(across(all_of(columns), mean, na.rm = TRUE))
-
+ # Assign fixed_cov based on conditions
+ if (!is.null(fixed_cat) && length(fixed_cat) == length(input$pred_fixed_info2)) {
+ fixed_cov <- NULL
+ } else if (length(input$pred_fixed_info2) > 0 && is.null(fixed_cat)) {
+ fixed_cov <- input$pred_fixed_info2
+ } else {
+ fixed_cov <- setdiff(input$pred_fixed_info2, input$pred_fixed_cat2)
+ }
+ #fixed_cov <- setdiff(input$pred_fixed_info2, input$pred_fixed_cat2)
+ cores <- 1
+ total_population <- ncol(gmat)
+ #train_size <- floor(percentage / 100 * total_population)
#Status
updateProgressBar(session = session, id = "pb_gp", value = 90, title = "Generating Results")
-
- ##Figures and Tables
-
+
+ #initialize dataframe
+ results_GEBVs <- matrix(nrow = ncol(gmat), ncol = length(traits) + 1)
+ results_TRAITs <- matrix(nrow = ncol(gmat), ncol = length(traits) + 1)
+ colnames(results_TRAITs) <- c("Sample",paste0(traits)) # Set the column names to be the traits
+ colnames(results_GEBVs) <- c("Sample",paste0(traits)) # Set the column names to be the traits
+
+ #GBLUP for each trait
+ for (trait_idx in 1:length(traits)) {
+ traitpred <- kin.blup(data = pheno2,
+ geno = colnames(pheno2)[1],
+ pheno = traits[trait_idx],
+ fixed = fixed_cat,
+ covariate = fixed_cov,
+ K=gmat,
+ n.core = cores)
+
+ results_GEBVs[, (trait_idx+1)] <- traitpred$g
+ results_TRAITs[, (trait_idx+1)] <- traitpred$pred
+ }
+ #Organize dataframes
+ results_GEBVs[,1] <- row.names(data.frame(traitpred$g))
+ results_TRAITs[,1] <- row.names(data.frame(traitpred$pred))
+
+ #Label the samples that already had phenotype information
+ results_GEBVs <- data.frame(results_GEBVs)
+ results_TRAITs <- data.frame(results_TRAITs)
+ exists_in_df <- results_GEBVs[[1]] %in% pheno2[[1]]
+ results_GEBVs <- cbind(results_GEBVs[1], "w/Pheno" = exists_in_df, results_GEBVs[-1])
+ results_TRAITs <- cbind(results_TRAITs[1], "w/Pheno" = exists_in_df, results_TRAITs[-1])
+
#Status
updateProgressBar(session = session, id = "pb_gp", value = 100, title = "Finished!")
+
+ ##Make output tables depending on 1 or 2 VCF/pedigree files used.
+ #GEBVs
+ if (!is.null(advanced_options_pred$pred_est_file)) {
+ # Subset rows where 'w/Pheno' is FALSE and drop the 'w/Pheno' column
+ pred_outputs2$all_GEBVs <- results_GEBVs[results_GEBVs$`w/Pheno` == FALSE, !names(results_GEBVs) %in% "w/Pheno"]
+ } else{
+ pred_outputs2$all_GEBVs <- results_GEBVs
+ }
+
+ #BLUPs of genetic values
+ if (!is.null(advanced_options_pred$pred_est_file)) {
+ # Subset rows where 'w/Pheno' is FALSE and drop the 'w/Pheno' column
+ pred_outputs2$trait_output <- results_TRAITs[results_TRAITs$`w/Pheno` == FALSE, !names(results_TRAITs) %in% "w/Pheno"]
+ } else{
+ pred_outputs2$trait_output <- results_TRAITs
+ }
#End the event
continue_prediction2(NULL)
@@ -720,6 +765,105 @@ mod_GS_server <- function(input, output, session, parent_session){
ex <- system.file("iris_passport_file.csv", package = "BIGapp")
file.copy(ex, file)
})
+
+ #Download files for GP
+ output$download_pred_results_file <- downloadHandler(
+ filename = function() {
+ paste0("Prediction-results-", Sys.Date(), ".zip")
+ },
+ content = function(file) {
+ # Temporary files list
+ temp_dir <- tempdir()
+ temp_files <- c()
+
+ # Create a temporary file for data frames
+ ebv_file <- file.path(temp_dir, paste0("GS-EBV-predictions-", Sys.Date(), ".csv"))
+ write.csv(pred_outputs2$all_GEBVs, ebv_file, row.names = FALSE)
+ temp_files <- c(temp_files, ebv_file)
+
+ trait_file <- file.path(temp_dir, paste0("GS-Phenotype-predictions-", Sys.Date(), ".csv"))
+ write.csv(pred_outputs2$trait_output, trait_file, row.names = FALSE)
+ temp_files <- c(temp_files, trait_file)
+
+ # Zip files only if there's something to zip
+ if (length(temp_files) > 0) {
+ zip(file, files = temp_files, extras = "-j") # Using -j to junk paths
+ }
+
+ # Optionally clean up
+ file.remove(temp_files)
+ }
+ )
+
+ ##Summary Info
+ pred_summary_info <- function() {
+ # Handle possible NULL values for inputs
+ dosage_file_name <- if (!is.null(input$pred_known_file$name)) input$pred_known_file$name else "No file selected"
+ est_file_name <- if (!is.null(input$pred_est_file$name)) input$pred_est_file$name else "No file selected"
+ passport_file_name <- if (!is.null(input$pred_trait_file$name)) input$pred_trait_file$name else "No file selected"
+ selected_ploidy <- if (!is.null(input$pred_est_ploidy)) as.character(input$pred_est_ploidy) else "Not selected"
+
+ # Print the summary information
+ cat(
+ "BIGapp Selection Summary\n",
+ "\n",
+ paste0("Date: ", Sys.Date()), "\n",
+ paste("R Version:", R.Version()$version.string), "\n",
+ "\n",
+ "### Input Files ###\n",
+ "\n",
+ paste("Input Genotype File 1:", dosage_file_name), "\n",
+ paste("Input Genotype File 2:", est_file_name), "\n",
+ paste("Input Passport File:", passport_file_name), "\n",
+ "\n",
+ "### User Selected Parameters ###\n",
+ "\n",
+ paste("Selected Ploidy:", selected_ploidy), "\n",
+ paste("Selected Trait(s):", input$pred_trait_info2), "\n",
+ paste("Selected Fixed Effects:", input$pred_fixed_info2), "\n",
+ #paste("Selected Model:", input$pred_fixed_info2), "\n",
+ #paste("Selected Matrix:", input$pred_fixed_info2), "\n",
+ "\n",
+ "### R Packages Used ###\n",
+ "\n",
+ paste("BIGapp:", packageVersion("BIGapp")), "\n",
+ paste("AGHmatrix:", packageVersion("AGHmatrix")), "\n",
+ paste("ggplot2:", packageVersion("ggplot2")), "\n",
+ paste("rrBLUP:", packageVersion("rrBLUP")), "\n",
+ paste("vcfR:", packageVersion("vcfR")), "\n",
+ paste("dplyr:", packageVersion("dplyr")), "\n",
+ paste("tidyr:", packageVersion("tidyr")), "\n",
+ sep = ""
+ )
+ }
+
+ # Popup for analysis summary
+ observeEvent(input$pred_summary, {
+ showModal(modalDialog(
+ title = "Summary Information",
+ size = "l",
+ easyClose = TRUE,
+ footer = tagList(
+ modalButton("Close"),
+ downloadButton("download_pred_info", "Download")
+ ),
+ pre(
+ paste(capture.output(pred_summary_info()), collapse = "\n")
+ )
+ ))
+ })
+
+
+ # Download Summary Info
+ output$download_pred_info <- downloadHandler(
+ filename = function() {
+ paste("pred_summary_", Sys.Date(), ".txt", sep = "")
+ },
+ content = function(file) {
+ # Write the summary info to a file
+ writeLines(paste(capture.output(pred_summary_info()), collapse = "\n"), file)
+ }
+ )
}
## To be copied in the UI
diff --git a/R/mod_GSAcc.R b/R/mod_GSAcc.R
index 788f425..9df59b2 100644
--- a/R/mod_GSAcc.R
+++ b/R/mod_GSAcc.R
@@ -12,16 +12,26 @@
#' @importFrom bs4Dash valueBox
#' @importFrom shiny NS tagList
#' @importFrom shinyWidgets virtualSelectInput
+#' @import shinydisconnect
#'
mod_GSAcc_ui <- function(id){
ns <- NS(id)
tagList(
# Add GWAS content here
fluidRow(
+ disconnectMessage(
+ text = "An unexpected error occurred, please reload the application and check the input file(s).",
+ refresh = "Reload now",
+ background = "white",
+ colour = "grey",
+ overlayColour = "grey",
+ overlayOpacity = 0.3,
+ refreshColour = "purple"
+ ),
column(width = 3,
box(title="Inputs", width = 12, collapsible = TRUE, collapsed = FALSE, status = "info", solidHeader = TRUE,
fileInput(ns("pred_file"), "Choose VCF File", accept = c(".csv",".vcf",".gz")),
- fileInput(ns("trait_file"), "Choose Passport File", accept = ".csv"),
+ fileInput(ns("trait_file"), "Choose Trait File", accept = ".csv"),
numericInput(ns("pred_ploidy"), "Species Ploidy", min = 1, value = NULL),
numericInput(ns("pred_cv"), "Iterations", min = 1, max=20, value = 5),
virtualSelectInput(
@@ -34,7 +44,7 @@ mod_GSAcc_ui <- function(id){
),
virtualSelectInput(
inputId = ns("pred_fixed_info"),
- label = "Select Fixed Effects (optional) (not validated):",
+ label = span("Select Fixed Effects", bs4Badge("beta", position = "right", color = "success")),
choices = NULL,
showValueAsTags = TRUE,
search = TRUE,
@@ -42,10 +52,11 @@ mod_GSAcc_ui <- function(id){
),
conditionalPanel(
condition = "input.pred_fixed_info.length > 0", ns = ns,
+ "(unselected will be considered covariates)",
div(
virtualSelectInput(
inputId = ns("pred_fixed_cat"),
- label = "Select Categorical Fixed Effects (unselected will be considered covariates)",
+ label = "Select Categorical Fixed Effects",
choices = NULL,
showValueAsTags = TRUE,
search = TRUE,
@@ -55,10 +66,13 @@ mod_GSAcc_ui <- function(id){
),
actionButton(ns("prediction_start"), "Run Analysis"),
div(style="display:inline-block; float:right", dropdownButton(
- tags$h3("GP Parameters"),
- "You can download examples of the expected input input files here: \n",
- downloadButton(ns('download_vcf'), "Download VCF Example File"),
- downloadButton(ns('download_pheno'), "Download Passport Example File"),
+ HTML("Input files"),
+ p(downloadButton(ns('download_vcf'),""), "VCF Example File"),
+ p(downloadButton(ns('download_pheno'),""), "Trait Example File"), hr(),
+ p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("Results description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ actionButton(ns("predAcc_summary"), "Summary"),
circle = FALSE,
status = "warning",
icon = icon("info"), width = "300px",
@@ -76,11 +90,11 @@ mod_GSAcc_ui <- function(id){
column(width = 6,
box(
- title = "Plots", status = "info", solidHeader = FALSE, width = 12, height = 600,
+ title = "Plots", status = "info", solidHeader = FALSE, width = 12, height = 600, maximizable = T,
bs4Dash::tabsetPanel(
tabPanel("Violin Plot", plotOutput(ns("pred_violin_plot"), height = "500px")),
tabPanel("Box Plot", plotOutput(ns("pred_box_plot"), height = "500px")),
- tabPanel("Accuracy Table", DTOutput(ns("pred_acc_table")), style = "overflow-y: auto; height: 500px")
+ tabPanel("P.A. Table", DTOutput(ns("pred_acc_table")), style = "overflow-y: auto; height: 500px")
)
)
@@ -135,6 +149,43 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
ns <- session$ns
+ # Help links
+ observeEvent(input$goPar, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "Predictive_Ability_tabset",
+ selected = "Predictive_Ability_par")
+ # expand specific box
+ updateBox(id = "Predictive_Ability_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goRes, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "Predictive_Ability_tabset",
+ selected = "Predictive_Ability_results")
+ # expand specific box
+ updateBox(id = "Predictive_Ability_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goCite, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "Predictive_Ability_tabset",
+ selected = "Predictive_Ability_cite")
+ # expand specific box
+ updateBox(id = "Predictive_Ability_box", action = "toggle", session = parent_session)
+ })
+
#Default model choices
advanced_options <- reactiveValues(
pred_model = "GBLUP",
@@ -162,8 +213,8 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
title = "Advanced Options (beta)",
selectInput(
inputId = ns('pred_model'),
- label = 'Model Choice',
- choices = c("rrBLUP", "GBLUP"),
+ label = 'Method Choice',
+ choices = c("GBLUP"),
selected = advanced_options$pred_model # Initialize with stored value
),
conditionalPanel(
@@ -171,7 +222,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
div(
selectInput(
inputId = ns('pred_matrix'),
- label = 'GBLUP Matrix Choice',
+ label = 'Relationship Matrix Choice',
choices = c("Gmatrix", "Amatrix", "Hmatrix"),
selected = advanced_options$pred_matrix # Initialize with stored value
)
@@ -638,7 +689,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
facet_wrap(~ Trait, nrow = 1) + # Facet by trait, allowing different y-scales
labs(title = "Predictive Ability by Trait",
x = " ",
- y = "Pearson Correlation") +
+ y = "Predictive Ability") +
#theme_minimal() + # Using a minimal theme
theme(legend.position = "none",
strip.text = element_text(size = 12),
@@ -655,7 +706,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
facet_wrap(~ Trait, nrow = 1) + # Facet by trait, allowing different y-scales
labs(title = "Predictive Ability by Trait",
x = " ", # x-label is blank because it's not relevant per facet
- y = "Pearson Correlation") +
+ y = "Predictive Ability") +
theme(legend.position = "none",
strip.text = element_text(size = 12),
axis.text = element_text(size = 12),
@@ -783,6 +834,74 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
ex <- system.file("iris_passport_file.csv", package = "BIGapp")
file.copy(ex, file)
})
+
+ ##Summary Info
+ predAcc_summary_info <- function() {
+ # Handle possible NULL values for inputs
+ dosage_file_name <- if (!is.null(input$pred_file$name)) input$pred_file$name else "No file selected"
+ passport_file_name <- if (!is.null(input$trait_file$name)) input$trait_file$name else "No file selected"
+ selected_ploidy <- if (!is.null(input$pred_ploidy)) as.character(input$pred_ploidy) else "Not selected"
+
+ # Print the summary information
+ cat(
+ "BIGapp Selection Model CV Summary\n",
+ "\n",
+ paste0("Date: ", Sys.Date()), "\n",
+ paste("R Version:", R.Version()$version.string), "\n",
+ "\n",
+ "### Input Files ###\n",
+ "\n",
+ paste("Input Genotype File:", dosage_file_name), "\n",
+ paste("Input Passport File:", passport_file_name), "\n",
+ "\n",
+ "### User Selected Parameters ###\n",
+ "\n",
+ paste("Selected Ploidy:", selected_ploidy), "\n",
+ paste("Selected Trait(s):", input$pred_trait_info), "\n",
+ paste("Selected Fixed Effects:", input$pred_fixed_info), "\n",
+ paste("Selected Model:", advanced_options$pred_model), "\n",
+ paste("Selected Matrix:", advanced_options$pred_matrix), "\n",
+ "\n",
+ "### R Packages Used ###\n",
+ "\n",
+ paste("BIGapp:", packageVersion("BIGapp")), "\n",
+ paste("AGHmatrix:", packageVersion("AGHmatrix")), "\n",
+ paste("ggplot2:", packageVersion("ggplot2")), "\n",
+ paste("rrBLUP:", packageVersion("rrBLUP")), "\n",
+ paste("vcfR:", packageVersion("vcfR")), "\n",
+ paste("dplyr:", packageVersion("dplyr")), "\n",
+ paste("tidyr:", packageVersion("tidyr")), "\n",
+ sep = ""
+ )
+ }
+
+ # Popup for analysis summary
+ observeEvent(input$predAcc_summary, {
+ showModal(modalDialog(
+ title = "Summary Information",
+ size = "l",
+ easyClose = TRUE,
+ footer = tagList(
+ modalButton("Close"),
+ downloadButton("download_predAcc_info", "Download")
+ ),
+ pre(
+ paste(capture.output(predAcc_summary_info()), collapse = "\n")
+ )
+ ))
+ })
+
+
+ # Download Summary Info
+ output$download_predAcc_info <- downloadHandler(
+ filename = function() {
+ paste("predAcc_summary_", Sys.Date(), ".txt", sep = "")
+ },
+ content = function(file) {
+ # Write the summary info to a file
+ writeLines(paste(capture.output(predAcc_summary_info()), collapse = "\n"), file)
+ }
+ )
}
## To be copied in the UI
diff --git a/R/mod_Home.R b/R/mod_Home.R
index 9dd0b8e..7e38e2a 100644
--- a/R/mod_Home.R
+++ b/R/mod_Home.R
@@ -20,17 +20,17 @@ mod_Home_ui <- function(id){
box(
title = "Breeding Insight Genomics App", status = "info", solidHeader = FALSE, width = 12, collapsible = FALSE,
HTML(
- "The app is under development
-
Breeding Insight provides bioinformatic processing support for our external collaborators. This R shiny app provides a web-based user friendly way for users to analyze genomic data without needing to use command-line tools.
+ "The BIGapp is a user-friendly tool for processing low to mid-density genotyping data for diploid and polyploid species. This R shiny app provides a web-based user friendly way for users to analyze genomic data without needing to use command-line tools.
+ Additional analysis will be added, with the initial focus on a core set of features for supporting breeding decisions.
Supported Analyses
Initial supported analyses includes the mature genomics/bioinformatics pipelines developed within Breeding Insight:
- - Genotype processing
- - Summary metrics
+ - Genotype Processing
+ - Summary Metrics
- Population Structure
- GWAS
- - GS
+ - Genomic Selection
"
),
style = "overflow-y: auto; height: 500px"
diff --git a/R/mod_PCA.R b/R/mod_PCA.R
index 9c871ba..229e240 100644
--- a/R/mod_PCA.R
+++ b/R/mod_PCA.R
@@ -16,92 +16,104 @@
mod_PCA_ui <- function(id){
ns <- NS(id)
tagList(
+ fluidPage(
# Add PCA content here
- fluidRow(
- useShinyjs(),
- inlineCSS(list(.borderred = "border-color: red", .redback = "background: red")),
-
- column(width = 3,
- box(
- title = "Inputs", width = 12, solidHeader = TRUE, status = "info",
- p("* Required"),
- fileInput(ns("dosage_file"), "Choose VCF File*", accept = c(".csv",".vcf",".gz")),
- fileInput(ns("passport_file"), "Choose Passport File (Sample IDs in first column)", accept = c(".csv")),
- #Dropdown will update after passport upload
- numericInput(ns("pca_ploidy"), "Species Ploidy*", min = 2, value = NULL),
- actionButton(ns("pca_start"), "Run Analysis"),
- div(style="display:inline-block; float:right",
- dropdownButton(
- tags$h3("PCA Inputs"),
- "You can download examples of the expected files here: \n",
- downloadButton(ns('download_vcf'), "Download VCF Example File"),
- downloadButton(ns('download_pheno'), "Download Passport Example File"),
- actionButton(ns("pca_summary"), "Summary"),
- circle = FALSE,
- status = "warning",
- icon = icon("info"), width = "300px",
- tooltip = tooltipOptions(title = "Click to see info!")
- )),
- style = "overflow-y: auto; height: 480px"
- ),
- box(
- title = "Plot Controls", status = "warning", solidHeader = TRUE, collapsible = TRUE,collapsed = FALSE, width = 12,
- "Change the PCA plot parameters", br(),
- selectInput(ns('group_info'), label = 'Color Variable (eg, Taxon)', choices = NULL),
- materialSwitch(ns('use_cat'), label = "Color Specific Category Within Variable?", status = "success"),
- conditionalPanel(condition = "input.use_cat",
- ns = ns,
- virtualSelectInput(
- inputId = ns("cat_color"),
- label = "Select Category To Color:",
- choices = NULL,
- showValueAsTags = TRUE,
- search = TRUE,
- multiple = TRUE
- ),
- selectInput(ns("grey_choice"), "Select Grey", choices = c("Light Grey", "Grey", "Dark Grey", "Black"), selected = "Grey")
+ fluidRow(
+ disconnectMessage(
+ text = "An unexpected error occurred, please reload the application and check the input file(s).",
+ refresh = "Reload now",
+ background = "white",
+ colour = "grey",
+ overlayColour = "grey",
+ overlayOpacity = 0.3,
+ refreshColour = "purple"
+ ),
+ useShinyjs(),
+ inlineCSS(list(.borderred = "border-color: red", .redback = "background: red")),
+
+ column(width = 3,
+ box(
+ title = "Inputs", width = 12, solidHeader = TRUE, status = "info",
+ p("* Required"),
+ fileInput(ns("dosage_file"), "Choose VCF File*", accept = c(".csv",".vcf",".gz")),
+ fileInput(ns("passport_file"), "Choose Trait File (IDs in first column)", accept = c(".csv")),
+ #Dropdown will update after passport upload
+ numericInput(ns("pca_ploidy"), "Species Ploidy*", min = 2, value = NULL),
+ br(),
+ actionButton(ns("pca_start"), "Run Analysis"),
+ div(style="display:inline-block; float:right",dropdownButton(
+ HTML("Input files"),
+ p(downloadButton(ns('download_vcf'),""), "VCF Example File"),
+ p(downloadButton(ns('download_pheno'),""), "Trait Example File"), hr(),
+ p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("Results description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ actionButton(ns("pca_summary"), "Summary"),
+ circle = FALSE,
+ status = "warning",
+ icon = icon("info"), width = "300px",
+ tooltip = tooltipOptions(title = "Click to see info!")
+ ))#,
+ #style = "overflow-y: auto; height: 480px"
),
- selectInput(ns("color_choice"), "Color Palette", choices = list("Standard Palettes" = c("Set1","Set3","Pastel2",
- "Pastel1","Accent","Spectral",
- "RdYlGn","RdGy"),
- "Colorblind Friendly" = c("Set2","Paired","Dark2","YlOrRd","YlOrBr","YlGnBu","YlGn",
- "Reds","RdPu","Purples","PuRd","PuBuGn","PuBu",
- "OrRd","Oranges","Greys","Greens","GnBu","BuPu",
- "BuGn","Blues","RdYlBu",
- "RdBu", "PuOr","PRGn","PiYG","BrBG"
- )),
- selected = "Set1"),
- selectInput(ns("pc_X"), "X-Axis (2D-Plot only)", choices = c("PC1","PC2","PC3","PC4","PC5"), selected = "PC1"),
- selectInput(ns("pc_Y"), "Y-Axis (2D-Plot only)", choices = c("PC1","PC2","PC3","PC4","PC5"), selected = "PC2"),
- div(style="display:inline-block; float:right",dropdownButton(
- tags$h3("Save Image"),
- selectInput(inputId = ns('pca_figure'), label = 'Figure', choices = c("2D Plot", "Scree Plot"), selected = "2D Plot"),
- selectInput(inputId = ns('pca_image_type'), label = 'File', choices = c("jpeg","tiff","png"), selected = "jpeg"),
- sliderInput(inputId = ns('pca_image_res'), label = 'Resolution', value = 300, min = 50, max = 1000, step=50),
- sliderInput(inputId = ns('pca_image_width'), label = 'Width', value = 10, min = 1, max = 20, step=0.5),
- sliderInput(inputId = ns('pca_image_height'), label = 'Height', value = 6, min = 1, max = 20, step = 0.5),
- downloadButton(ns("download_pca"), "Save Image"),
- circle = FALSE,
- status = "danger",
- icon = icon("floppy-disk"), width = "300px",
- tooltip = tooltipOptions(title = "Click to see inputs!")
- ))
- )
- ),
- column(width = 8,
- box(title = "Passport Data", width = 12, solidHeader = TRUE, collapsible = TRUE, status = "info", collapsed = FALSE,
- DTOutput(ns('passport_table')),
- style = "overflow-y: auto; height: 480px"
- ),
- box(
- title = "PCA Plots", status = "info", solidHeader = FALSE, width = 12, height = 550, maximizable = T,
- bs4Dash::tabsetPanel(
- tabPanel("3D-Plot",withSpinner(plotlyOutput(ns("pca_plot"), height = '460px'))),
- tabPanel("2D-Plot", withSpinner(plotOutput(ns("pca_plot_ggplot"), height = '460px'))),
- tabPanel("Scree Plot", withSpinner(plotOutput(ns("scree_plot"), height = '460px')))) # Placeholder for plot outputs
- )
- ),
- column(width = 1)
+ box(
+ title = "Plot Controls", status = "warning", solidHeader = TRUE, collapsible = TRUE,collapsed = FALSE, width = 12,
+ selectInput(ns('group_info'), label = 'Color Variable', choices = NULL),
+ materialSwitch(ns('use_cat'), label = "Color Category", status = "success"),
+ conditionalPanel(condition = "input.use_cat",
+ ns = ns,
+ virtualSelectInput(
+ inputId = ns("cat_color"),
+ label = "Select Category To Color:",
+ choices = NULL,
+ showValueAsTags = TRUE,
+ search = TRUE,
+ multiple = TRUE
+ ),
+ selectInput(ns("grey_choice"), "Select Grey", choices = c("Light Grey", "Grey", "Dark Grey", "Black"), selected = "Grey")
+ ),
+ selectInput(ns("color_choice"), "Color Palette", choices = list("Standard Palettes" = c("Set1","Set3","Pastel2",
+ "Pastel1","Accent","Spectral",
+ "RdYlGn","RdGy"),
+ "Colorblind Friendly" = c("Set2","Paired","Dark2","YlOrRd","YlOrBr","YlGnBu","YlGn",
+ "Reds","RdPu","Purples","PuRd","PuBuGn","PuBu",
+ "OrRd","Oranges","Greys","Greens","GnBu","BuPu",
+ "BuGn","Blues","RdYlBu",
+ "RdBu", "PuOr","PRGn","PiYG","BrBG"
+ )),
+ selected = "Set1"),
+ selectInput(ns("pc_X"), "X-Axis (2D-Plot only)", choices = c("PC1","PC2","PC3","PC4","PC5"), selected = "PC1"),
+ selectInput(ns("pc_Y"), "Y-Axis (2D-Plot only)", choices = c("PC1","PC2","PC3","PC4","PC5"), selected = "PC2"),
+ div(style="display:inline-block; float:right",dropdownButton(
+ tags$h3("Save Image"),
+ selectInput(inputId = ns('pca_figure'), label = 'Figure', choices = c("2D Plot", "Scree Plot"), selected = "2D Plot"),
+ selectInput(inputId = ns('pca_image_type'), label = 'File', choices = c("jpeg","tiff","png"), selected = "jpeg"),
+ sliderInput(inputId = ns('pca_image_res'), label = 'Resolution', value = 300, min = 50, max = 1000, step=50),
+ sliderInput(inputId = ns('pca_image_width'), label = 'Width', value = 10, min = 1, max = 20, step=0.5),
+ sliderInput(inputId = ns('pca_image_height'), label = 'Height', value = 6, min = 1, max = 20, step = 0.5),
+ downloadButton(ns("download_pca"), "Save Image"),
+ circle = FALSE,
+ status = "danger",
+ icon = icon("floppy-disk"), width = "300px",
+ tooltip = tooltipOptions(title = "Click to see inputs!")
+ ))
+ )
+ ),
+ column(width = 8,
+ box(title = "Trait Data", width = 12, solidHeader = TRUE, collapsible = TRUE, status = "info", collapsed = FALSE, maximizable = T,
+ DTOutput(ns('passport_table')),
+ style = "overflow-y: auto; height: 480px"
+ ),
+ box(
+ title = "PCA Plots", status = "info", solidHeader = FALSE, width = 12, height = 550, maximizable = T,
+ bs4Dash::tabsetPanel(
+ tabPanel("3D-Plot",withSpinner(plotlyOutput(ns("pca_plot"), height = '460px'))),
+ tabPanel("2D-Plot", withSpinner(plotOutput(ns("pca_plot_ggplot"), height = '460px'))),
+ tabPanel("Scree Plot", withSpinner(plotOutput(ns("scree_plot"), height = '460px')))) # Placeholder for plot outputs
+ )
+ ),
+ column(width = 1)
+ )
)
)
}
@@ -114,12 +126,51 @@ mod_PCA_ui <- function(id){
#' @importFrom plotly layout plotlyOutput renderPlotly add_markers plot_ly
#' @importFrom RColorBrewer brewer.pal
#' @importFrom shinyjs toggleClass
+#' @import shinydisconnect
#'
#' @noRd
mod_PCA_server <- function(input, output, session, parent_session){
ns <- session$ns
-
+ options(warn = -1) #Uncomment to suppress warnings
+
+ # Help links
+ observeEvent(input$goPar, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "PCA_tabset",
+ selected = "PCA_par")
+ # expand specific box
+ updateBox(id = "PCA_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goRes, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "PCA_tabset",
+ selected = "PCA_results")
+ # expand specific box
+ updateBox(id = "PCA_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goCite, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "PCA_tabset",
+ selected = "PCA_cite")
+ # expand specific box
+ updateBox(id = "PCA_box", action = "toggle", session = parent_session)
+ })
+
#PCA reactive values
pca_data <- reactiveValues(
pc_df_pop = NULL,
@@ -198,7 +249,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
} else{
#Import genotype information if in VCF format
- vcf <- read.vcfR(geno)
+ vcf <- read.vcfR(geno, verbose = FALSE)
#Get items in FORMAT column
info <- vcf@gt[1,"FORMAT"] #Getting the first row FORMAT
@@ -252,6 +303,25 @@ mod_PCA_server <- function(input, output, session, parent_session){
# Print the modified dataframe
row.names(info_df) <- info_df[,1]
+ # Check ploidy
+ if(input$pca_ploidy != max(genomat, na.rm = T)){
+ shinyalert(
+ title = "Wrong ploidy",
+ text = "The input ploidy does not match the maximum dosage found in the genotype file",
+ size = "s",
+ closeOnEsc = TRUE,
+ closeOnClickOutside = FALSE,
+ html = TRUE,
+ type = "error",
+ showConfirmButton = TRUE,
+ confirmButtonText = "OK",
+ confirmButtonCol = "#004192",
+ showCancelButton = FALSE,
+ animation = TRUE
+ )
+ req(input$pca_ploidy == max(genomat, na.rm = T))
+ }
+
#Plotting
#First build a relationship matrix using the genotype values
G.mat.updog <- Gmatrix(t(genomat), method = "VanRaden", ploidy = as.numeric(ploidy), missingValue = "NA")
@@ -312,14 +382,14 @@ mod_PCA_server <- function(input, output, session, parent_session){
#End of PCA section
})
-
+
##2D PCA plotting
pca_2d <- reactive({
validate(
need(!is.null(pca_data$pc_df_pop), "Input Genotype file, Species ploidy, and run the analysis to access results in this section.")
)
-
+
# Generate colors
if (!is.null(pca_data$my_palette)) {
@@ -396,7 +466,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
tit = paste0('Total Explained Variance =', sum(pca_data$variance_explained[1:3]))
- fig <- plot_ly(pca_data$pc_df_pop, x = ~PC1, y = ~PC2, z = ~PC3, color = pca_data$pc_df_pop[[input$group_info]],
+ fig <- plot_ly(pca_data$pc_df_pop, x = ~PC1, y = ~PC2, z = ~PC3, color = as.factor(pca_data$pc_df_pop[[input$group_info]]),
colors = my_palette) %>%
add_markers(size = 12, text = paste0("Sample:",pca_data$pc_df_pop$Row.names))
@@ -448,14 +518,14 @@ mod_PCA_server <- function(input, output, session, parent_session){
output$scree_plot <- renderPlot({
pca_scree()
})
-
+
##Summary Info
pca_summary_info <- function() {
# Handle possible NULL values for inputs
dosage_file_name <- if (!is.null(input$dosage_file$name)) input$dosage_file$name else "No file selected"
passport_file_name <- if (!is.null(input$passport_file$name)) input$passport_file$name else "No file selected"
selected_ploidy <- if (!is.null(input$pca_ploidy)) as.character(input$pca_ploidy) else "Not selected"
-
+
# Print the summary information
cat(
"BIGapp PCA Summary\n",
@@ -483,7 +553,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
sep = ""
)
}
-
+
# Popup for analysis summary
observeEvent(input$pca_summary, {
showModal(modalDialog(
@@ -499,8 +569,8 @@ mod_PCA_server <- function(input, output, session, parent_session){
)
))
})
-
-
+
+
# Download Summary Info
output$download_pca_info <- downloadHandler(
filename = function() {
@@ -511,7 +581,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
writeLines(paste(capture.output(pca_summary_info()), collapse = "\n"), file)
}
)
-
+
#Download figures for PCA
output$download_pca <- downloadHandler(
diff --git a/R/mod_dapc.R b/R/mod_dapc.R
index 02002bc..f221261 100644
--- a/R/mod_dapc.R
+++ b/R/mod_dapc.R
@@ -8,11 +8,21 @@
#' @noRd
#'
#' @importFrom shiny NS tagList
+#' @import shinydisconnect
mod_dapc_ui <- function(id){
ns <- NS(id)
tagList(
# Add PCA content here
fluidRow(
+ disconnectMessage(
+ text = "An unexpected error occurred, please reload the application and check the input file(s).",
+ refresh = "Reload now",
+ background = "white",
+ colour = "grey",
+ overlayColour = "grey",
+ overlayOpacity = 0.3,
+ refreshColour = "purple"
+ ),
column(width = 3,
bs4Dash::box(
title = "Inputs", width = 12, solidHeader = TRUE, status = "info",
@@ -23,10 +33,13 @@ mod_dapc_ui <- function(id){
numericInput(ns("dapc_ploidy"), "Species Ploidy", min = 1, value = NULL),
actionButton(ns("K_start"), "Run Step 1"),
div(style="display:inline-block; float:right",dropdownButton(
- tags$h3("DAPC Inputs"),
- "You can download an examples of the expected input file here: \n",
- downloadButton(ns('download_vcf'), "Download VCF Example File"),
- #"DAPC Input file and analysis info. The DAPC analysis is broken down into two steps. The first step (Step 1), uses Kmeans clustering to estimate the most likely number of clusters within the dataset. This is visualized in the BIC plot and is typically the minimum BIC value. Step 2 is the DAPC analysis where the most likely value for K (number of clusters) is input and the cluster memberships are determined in the DAPC results",
+ HTML("Input files"),
+ p(downloadButton(ns('download_vcf'),""), "VCF Example File"),
+ p(downloadButton(ns('download_pheno'),""), "Trait Example File"), hr(),
+ p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("Results description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ actionButton(ns("dapc_summary"), "Summary"),
circle = FALSE,
status = "warning",
icon = icon("info"), width = "300px",
@@ -83,13 +96,13 @@ mod_dapc_ui <- function(id){
)
),
column(width = 8,
- bs4Dash::box(title = "DAPC Data", width = 12, solidHeader = TRUE, collapsible = TRUE, status = "info", collapsed = FALSE,
+ bs4Dash::box(title = "DAPC Data", width = 12, solidHeader = TRUE, collapsible = TRUE, status = "info", collapsed = FALSE, maximizable = T,
bs4Dash::tabsetPanel(
tabPanel("BIC Values",DTOutput(ns('BIC_table'))),
tabPanel("DAPC Values", DTOutput(ns('DAPC_table'))), # Placeholder for plot outputs
br(), br()
)),
- bs4Dash::box(title = "DAPC Plots", status = "info", solidHeader = FALSE, width = 12, height = 550,
+ bs4Dash::box(title = "DAPC Plots", status = "info", solidHeader = FALSE, width = 12, height = 550, maximizable = T,
bs4Dash::tabsetPanel(
tabPanel("BIC Plot",withSpinner(plotOutput(ns("BIC_plot"), height = '460px'))),
tabPanel("DAPC Plot", withSpinner(plotOutput(ns("DAPC_plot"), height = '460px'))),
@@ -112,7 +125,43 @@ mod_dapc_ui <- function(id){
mod_dapc_server <- function(input, output, session, parent_session){
ns <- session$ns
-
+
+ # Help links
+ observeEvent(input$goPar, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "DAPC_tabset",
+ selected = "DAPC_par")
+ # expand specific box
+ updateBox(id = "DAPC_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goRes, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "DAPC_tabset",
+ selected = "DAPC_results")
+ # expand specific box
+ updateBox(id = "DAPC_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goCite, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "DAPC_tabset",
+ selected = "DAPC_cite")
+ # expand specific box
+ updateBox(id = "DAPC_box", action = "toggle", session = parent_session)
+ })
dapc_items <- reactiveValues(
grp = NULL,
@@ -144,6 +193,9 @@ mod_dapc_server <- function(input, output, session, parent_session){
)
}
req(input$dosage_file$datapath, input$dapc_ploidy)
+
+ #PB
+ updateProgressBar(session = session, id = "pb_dapc", value = 5, title = "Importing input files")
ploidy <- as.numeric(input$dapc_ploidy)
maxK <- as.numeric(input$dapc_kmax)
@@ -171,7 +223,10 @@ mod_dapc_server <- function(input, output, session, parent_session){
genotypeMatrix <- apply(genotypeMatrix, 2, convert_to_dosage)
rm(vcf) #Remove VCF
}
-
+
+ #PB
+ updateProgressBar(session = session, id = "pb_dapc", value = 30, title = "Calculating K")
+
#Perform analysis
get_k <- findK(genotypeMatrix, maxK, ploidy)
@@ -179,6 +234,10 @@ mod_dapc_server <- function(input, output, session, parent_session){
dapc_items$grp <- get_k$grp
dapc_items$bestK <- get_k$bestK
dapc_items$BIC <- get_k$BIC
+
+ #PB
+ updateProgressBar(session = session, id = "pb_dapc", value = 100, title = "Step 1: Finished!")
+
})
observeEvent(input$dapc_start, {
@@ -203,7 +262,10 @@ mod_dapc_server <- function(input, output, session, parent_session){
)
}
req(input$dosage_file$datapath, input$dapc_ploidy, input$dapc_k)
-
+
+ #Pb
+ updateProgressBar(session = session, id = "pb_dapc", value = 5, title = "Importing input files")
+
geno <- input$dosage_file$datapath
ploidy <- as.numeric(input$dapc_ploidy)
selected_K <- as.numeric(input$dapc_k)
@@ -216,7 +278,10 @@ mod_dapc_server <- function(input, output, session, parent_session){
# Apply the function to the first INFO string
info_ids <- extract_info_ids(info[1])
-
+
+ #Pb
+ updateProgressBar(session = session, id = "pb_dapc", value = 30, title = "Formatting files")
+
#Get the genotype values if the updog dosage calls are present
if ("UD" %in% info_ids) {
genotypeMatrix <- extract.gt(vcf, element = "UD")
@@ -228,13 +293,20 @@ mod_dapc_server <- function(input, output, session, parent_session){
genotypeMatrix <- apply(genotypeMatrix, 2, convert_to_dosage)
rm(vcf) #Remove VCF
}
-
+
+ #Pb
+ updateProgressBar(session = session, id = "pb_dapc", value = 60, title = "Performing DAPC")
+
#Perform analysis
clusters <- performDAPC(genotypeMatrix, selected_K, ploidy)
#Assign results to reactive value
dapc_items$assignments <- clusters$Q
dapc_items$dapc <- clusters$dapc
+
+ #Pb
+ updateProgressBar(session = session, id = "pb_dapc", value = 100, title = "Finished!")
+
})
###Outputs from DAPC
@@ -425,6 +497,70 @@ mod_dapc_server <- function(input, output, session, parent_session){
ex <- system.file("iris_DArT_VCF.vcf.gz", package = "BIGapp")
file.copy(ex, file)
})
+
+ ##Summary Info
+ dapc_summary_info <- function() {
+ #Handle possible NULL values for inputs
+ dosage_file_name <- if (!is.null(input$dosage_file$name)) input$dosage_file$name else "No file selected"
+ #passport_file_name <- if (!is.null(input$passport_file$name)) input$passport_file$name else "No file selected"
+ selected_ploidy <- if (!is.null(input$dapc_ploidy)) as.character(input$dapc_ploidy) else "Not selected"
+
+ #Print the summary information
+ cat(
+ "BIGapp DAPC Summary\n",
+ "\n",
+ paste0("Date: ", Sys.Date()), "\n",
+ paste("R Version:", R.Version()$version.string), "\n",
+ "\n",
+ "### Input Files ###\n",
+ "\n",
+ paste("Input Genotype File:", dosage_file_name), "\n",
+ #paste("Input Passport File:", passport_file_name), "\n",
+ "\n",
+ "### User Selected Parameters ###\n",
+ "\n",
+ paste("Selected Ploidy:", selected_ploidy), "\n",
+ paste("Maximum K:", input$dapc_kmax), "\n",
+ paste("Number of Clusters (K):", input$dapc_k), "\n",
+ "\n",
+ "### R Packages Used ###\n",
+ "\n",
+ paste("BIGapp:", packageVersion("BIGapp")), "\n",
+ paste("adegenet:", packageVersion("adegenet")), "\n",
+ paste("ggplot2:", packageVersion("ggplot2")), "\n",
+ paste("vcfR:", packageVersion("vcfR")), "\n",
+ paste("RColorBrewer:", packageVersion("RColorBrewer")), "\n",
+ sep = ""
+ )
+ }
+
+ # Popup for analysis summary
+ observeEvent(input$dapc_summary, {
+ showModal(modalDialog(
+ title = "Summary Information",
+ size = "l",
+ easyClose = TRUE,
+ footer = tagList(
+ modalButton("Close"),
+ downloadButton("download_dapc_info", "Download")
+ ),
+ pre(
+ paste(capture.output(dapc_summary_info()), collapse = "\n")
+ )
+ ))
+ })
+
+
+ # Download Summary Info
+ output$download_dapc_info <- downloadHandler(
+ filename = function() {
+ paste("dapc_summary_", Sys.Date(), ".txt", sep = "")
+ },
+ content = function(file) {
+ # Write the summary info to a file
+ writeLines(paste(capture.output(dapc_summary_info()), collapse = "\n"), file)
+ }
+ )
}
## To be copied in the UI
diff --git a/R/mod_diversity.R b/R/mod_diversity.R
index 8829de5..90295af 100644
--- a/R/mod_diversity.R
+++ b/R/mod_diversity.R
@@ -7,20 +7,33 @@
#' @noRd
#'
#' @importFrom shiny NS tagList
+#' @import shinydisconnect
mod_diversity_ui <- function(id){
ns <- NS(id)
tagList(
# Add GWAS content here
fluidRow(
+ disconnectMessage(
+ text = "An unexpected error occurred, please reload the application and check the input file(s).",
+ refresh = "Reload now",
+ background = "white",
+ colour = "grey",
+ overlayColour = "grey",
+ overlayOpacity = 0.3,
+ refreshColour = "purple"
+ ),
column(width = 3,
box(title="Inputs", width = 12, collapsible = TRUE, collapsed = FALSE, status = "info", solidHeader = TRUE,
fileInput(ns("diversity_file"), "Choose VCF File", accept = c(".csv",".vcf",".gz")),
numericInput(ns("diversity_ploidy"), "Species Ploidy", min = 1, value = NULL),
actionButton(ns("diversity_start"), "Run Analysis"),
div(style="display:inline-block; float:right",dropdownButton(
- tags$h3("Diversity Parameters"),
- "You can download an examples of the expected input file here: \n",
- downloadButton(ns('download_vcf'), "Download VCF Example File"),
+ HTML("Input files"),
+ p(downloadButton(ns('download_vcf'),""), "VCF Example File"),
+ p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("Results description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ actionButton(ns("diversity_summary"), "Summary"),
circle = FALSE,
status = "warning",
icon = icon("info"), width = "300px",
@@ -51,7 +64,7 @@ mod_diversity_ui <- function(id){
),
column(width = 6,
box(
- title = "Plots", status = "info", solidHeader = FALSE, width = 12, height = 550,
+ title = "Plots", status = "info", solidHeader = FALSE, width = 12, height = 550, maximizable = T,
bs4Dash::tabsetPanel(
tabPanel("Dosage Plot", plotOutput(ns('dosage_plot')),style = "overflow-y: auto; height: 500px"),
tabPanel("MAF Plot", plotOutput(ns('maf_plot')),style = "overflow-y: auto; height: 500px"),
@@ -83,6 +96,44 @@ mod_diversity_ui <- function(id){
mod_diversity_server <- function(input, output, session, parent_session){
ns <- session$ns
+
+ # Help links
+ observeEvent(input$goPar, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "Genomic_Diversity_tabset",
+ selected = "Genomic_Diversity_par")
+ # expand specific box
+ updateBox(id = "Genomic_Diversity_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goRes, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "Genomic_Diversity_tabset",
+ selected = "Genomic_Diversity_results")
+ # expand specific box
+ updateBox(id = "Genomic_Diversity_box", action = "toggle", session = parent_session)
+ })
+
+ observeEvent(input$goCite, {
+ # change to help tab
+ updatebs4TabItems(session = parent_session, inputId = "MainMenu",
+ selected = "help")
+
+ # select specific tab
+ updateTabsetPanel(session = parent_session, inputId = "Genomic_Diversity_tabset",
+ selected = "Genomic_Diversity_cite")
+ # expand specific box
+ updateBox(id = "Genomic_Diversity_box", action = "toggle", session = parent_session)
+ })
+
#######Genomic Diversity analysis
#Genomic Diversity output files
@@ -92,7 +143,8 @@ mod_diversity_server <- function(input, output, session, parent_session){
het_df = NULL,
maf_df = NULL,
pos_df = NULL,
- markerPlot = NULL
+ markerPlot = NULL,
+ snp_stats = NULL
)
#Reactive boxes
@@ -144,7 +196,7 @@ mod_diversity_server <- function(input, output, session, parent_session){
updateProgressBar(session = session, id = "pb_diversity", value = 20, title = "Importing VCF")
#Import genotype information if in VCF format
- vcf <- read.vcfR(geno)
+ vcf <- read.vcfR(geno, verbose = FALSE)
#Save position information
diversity_items$pos_df <- data.frame(vcf@fix[, 1:2])
@@ -163,17 +215,16 @@ mod_diversity_server <- function(input, output, session, parent_session){
geno_mat <- extract.gt(vcf, element = "GT")
geno_mat <- apply(geno_mat, 2, convert_to_dosage)
rm(vcf) #Remove VCF
-
- print(class(geno_mat))
+ #print(class(geno_mat))
#Convert genotypes to alternate counts if they are the reference allele counts
#Importantly, the dosage plot is based on the input format NOT the converted genotypes
- is_reference <- TRUE #(input$zero_value == "Reference Allele Counts")
+ is_reference <- FALSE #(input$zero_value == "Reference Allele Counts")
- print("Genotype file successfully imported")
+ #print("Genotype file successfully imported")
######Get MAF plot (Need to remember that the VCF genotypes are likely set as 0 = homozygous reference, where the dosage report is 0 = homozygous alternate)
- print("Starting percentage calc")
+ #print("Starting percentage calc")
#Status
updateProgressBar(session = session, id = "pb_diversity", value = 70, title = "Calculating...")
# Calculate percentages for both genotype matrices
@@ -182,7 +233,7 @@ mod_diversity_server <- function(input, output, session, parent_session){
percentages1_df <- as.data.frame(t(percentages1))
percentages1_df$Data <- "Dosages"
# Assuming my_data is your dataframe
- print("Percentage Complete: melting dataframe")
+ #print("Percentage Complete: melting dataframe")
melted_data <- percentages1_df %>%
pivot_longer(cols = -(Data),names_to = "Dosage", values_to = "Percentage")
@@ -197,16 +248,46 @@ mod_diversity_server <- function(input, output, session, parent_session){
# Calculating heterozygosity
diversity_items$het_df <- calculate_heterozygosity(geno_mat, ploidy = ploidy)
- print("Heterozygosity success")
+ #print("Heterozygosity success")
diversity_items$maf_df <- calculateMAF(geno_mat, ploidy = ploidy)
diversity_items$maf_df <- diversity_items$maf_df[, c(1,3)]
- print("MAF success")
-
+ #Calculate PIC
+ calc_allele_frequencies <- function(d_diplo_t, ploidy) {
+ allele_frequencies <- apply(d_diplo_t, 1, function(x) {
+ count_sum <- sum(!is.na(x))
+ allele_sum <- sum(x, na.rm = TRUE)
+ if (count_sum != 0) {allele_sum / (ploidy * count_sum)} else {NA}
+ })
+
+ all_allele_frequencies <- data.frame(SNP = rownames(d_diplo_t), p1= allele_frequencies, p2= 1-allele_frequencies)
+ return(all_allele_frequencies)
+ }
+ Fre <-calc_allele_frequencies(geno_mat,as.numeric(ploidy))
+ calc_pic <- function(x) {
+ freq_squared <- x^2
+ outer_matrix <- outer(freq_squared, freq_squared)
+ upper_tri_sum <- sum(outer_matrix[upper.tri(outer_matrix)])
+ pic <- 1 - sum(freq_squared) - 2*upper_tri_sum
+ return(pic)
+ }
+
+ print(Fre[1:5,])
+
+ PIC_results <- apply(Fre[, c("p1", "p2")], 1, calc_pic)
+ PIC_df <- data.frame(SNP_ID = Fre$SNP, PIC = PIC_results)
+ rownames(PIC_df) <- NULL
+
+ print(PIC_df[1:5,])
+ print(diversity_items$maf_df[1:5,])
+
+ diversity_items$snp_stats <- (merge(diversity_items$maf_df, PIC_df, by = "SNP_ID", all = TRUE))[,c("SNP_ID","MAF","PIC")]
+ colnames(diversity_items$snp_stats)[1] <- "SNP"
+
#Updating value boxes
output$mean_het_box <- renderValueBox({
valueBox(
- value = round(mean(diversity_items$het_df$ObservedHeterozygosity),3),
+ value = round(mean(diversity_items$het_df$Ho),3),
subtitle = "Mean Heterozygosity",
icon = icon("dna"),
color = "info"
@@ -248,46 +329,17 @@ mod_diversity_server <- function(input, output, session, parent_session){
box_plot()
})
- #Het plot
- het_plot <- reactive({
+ output$het_plot <- renderPlot({
validate(
need(!is.null(diversity_items$het_df) & !is.null(input$hist_bins), "Input VCF, define parameters and click `run analysis` to access results in this session.")
)
- hist(diversity_items$het_df$ObservedHeterozygosity, breaks = as.numeric(input$hist_bins), col = "tan3", border = "black", xlim= c(0,1),
+ hist(diversity_items$het_df$Ho, breaks = as.numeric(input$hist_bins), col = "tan3", border = "black", xlim= c(0,1),
xlab = "Observed Heterozygosity",
ylab = "Number of Samples",
main = "Sample Observed Heterozygosity")
-
axis(1, at = seq(0, 1, by = 0.1), labels = TRUE)
})
- output$het_plot <- renderPlot({
- het_plot()
- })
-
- #AF Plot
- #af_plot <- reactive({
- # validate(
- # need(!is.null(diversity_items$maf_df) & !is.null(input$hist_bins), "Input VCF, define parameters and click `run analysis` to access results in this session.")
- # )
- # hist(diversity_items$maf_df$AF, breaks = as.numeric(input$hist_bins), col = "grey", border = "black", xlab = "Alternate Allele Frequency",
- # ylab = "Frequency", main = "Alternate Allele Frequency Distribution")
- #})
-
- #output$af_plot <- renderPlot({
- # af_plot()
- #})
-
- #MAF plot
- maf_plot <- reactive({
- validate(
- need(!is.null(diversity_items$maf_df) & !is.null(input$hist_bins), "Input VCF, define parameters and click `run analysis` to access results in this session.")
- )
-
- hist(diversity_items$maf_df$MAF, breaks = as.numeric(input$hist_bins), col = "grey", border = "black", xlab = "Minor Allele Frequency (MAF)",
- ylab = "Frequency", main = "Minor Allele Frequency Distribution")
- })
-
#Marker plot
marker_plot <- reactive({
validate(
@@ -297,8 +349,8 @@ mod_diversity_server <- function(input, output, session, parent_session){
diversity_items$pos_df$POS <- as.numeric(diversity_items$pos_df$POS)
# Sort the dataframe and pad with a 0 if only a single digit is provided
diversity_items$pos_df$CHROM <- ifelse(
- nchar(diversity_items$pos_df$CHROM) == 1,
- paste0("0", diversity_items$pos_df$CHROM),
+ nchar(diversity_items$pos_df$CHROM) == 1,
+ paste0("0", diversity_items$pos_df$CHROM),
diversity_items$pos_df$CHROM
)
diversity_items$pos_df <- diversity_items$pos_df[order(diversity_items$pos_df$CHROM), ]
@@ -346,7 +398,12 @@ mod_diversity_server <- function(input, output, session, parent_session){
})
output$maf_plot <- renderPlot({
- maf_plot()
+ validate(
+ need(!is.null(diversity_items$maf_df) & !is.null(input$hist_bins), "Input VCF, define parameters and click `run analysis` to access results in this session.")
+ )
+
+ hist(diversity_items$maf_df$MAF, breaks = as.numeric(input$hist_bins), col = "grey", border = "black", xlab = "Minor Allele Frequency (MAF)",
+ ylab = "Frequency", main = "Minor Allele Frequency Distribution")
})
sample_table <- reactive({
@@ -360,9 +417,9 @@ mod_diversity_server <- function(input, output, session, parent_session){
snp_table <- reactive({
validate(
- need(!is.null(diversity_items$maf_df), "Input VCF, define parameters and click `run analysis` to access results in this session.")
+ need(!is.null(diversity_items$snp_stats), "Input VCF, define parameters and click `run analysis` to access results in this session.")
)
- diversity_items$maf_df
+ diversity_items$snp_stats
})
output$snp_table <- renderDT({snp_table()}, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5))
@@ -393,12 +450,15 @@ mod_diversity_server <- function(input, output, session, parent_session){
# Conditional plotting based on input selection
if (input$div_figure == "Dosage Plot") {
print(box_plot())
- } else if (input$div_figure == "AF Histogram") {
- af_plot()
} else if (input$div_figure == "MAF Histogram") {
- maf_plot()
+ hist(diversity_items$maf_df$MAF, breaks = as.numeric(input$hist_bins), col = "grey", border = "black", xlab = "Minor Allele Frequency (MAF)",
+ ylab = "Frequency", main = "Minor Allele Frequency Distribution")
} else if (input$div_figure == "OHet Histogram") {
- het_plot()
+ hist(diversity_items$het_df$Ho, breaks = as.numeric(input$hist_bins), col = "tan3", border = "black", xlim= c(0,1),
+ xlab = "Observed Heterozygosity",
+ ylab = "Number of Samples",
+ main = "Sample Observed Heterozygosity")
+ axis(1, at = seq(0, 1, by = 0.1), labels = TRUE)
} else if (input$div_figure == "Marker Plot") {
print(marker_plot())
}
@@ -425,10 +485,10 @@ mod_diversity_server <- function(input, output, session, parent_session){
temp_files <- c(temp_files, het_file)
}
- if (!is.null(diversity_items$maf_df)) {
+ if (!is.null(diversity_items$snp_stats)) {
# Create a temporary file for BIC data frame
maf_file <- file.path(temp_dir, paste0("SNP-statistics-", Sys.Date(), ".csv"))
- write.csv(diversity_items$maf_df, maf_file, row.names = FALSE)
+ write.csv(diversity_items$snp_stats, maf_file, row.names = FALSE)
temp_files <- c(temp_files, maf_file)
}
@@ -450,10 +510,69 @@ mod_diversity_server <- function(input, output, session, parent_session){
ex <- system.file("iris_DArT_VCF.vcf.gz", package = "BIGapp")
file.copy(ex, file)
})
+
+ ##Summary Info
+ diversity_summary_info <- function() {
+ # Handle possible NULL values for inputs
+ dosage_file_name <- if (!is.null(input$diversity_file$name)) input$diversity_file$name else "No file selected"
+ selected_ploidy <- if (!is.null(input$diversity_ploidy)) as.character(input$diversity_ploidy) else "Not selected"
+
+ # Print the summary information
+ cat(
+ "BIGapp Summary Metrics Summary\n",
+ "\n",
+ paste0("Date: ", Sys.Date()), "\n",
+ paste(R.Version()$version.string), "\n",
+ "\n",
+ "### Input Files ###\n",
+ "\n",
+ paste("Input Genotype File:", dosage_file_name), "\n",
+ "\n",
+ "### User Selected Parameters ###\n",
+ "\n",
+ paste("Selected Ploidy:", selected_ploidy), "\n",
+ "\n",
+ "### R Packages Used ###\n",
+ "\n",
+ paste("BIGapp:", packageVersion("BIGapp")), "\n",
+ paste("BIGr:", packageVersion("BIGr")), "\n",
+ paste("ggplot2:", packageVersion("ggplot2")), "\n",
+ paste("vcfR:", packageVersion("vcfR")), "\n",
+ sep = ""
+ )
+ }
+
+ # Popup for analysis summary
+ observeEvent(input$diversity_summary, {
+ showModal(modalDialog(
+ title = "Summary Information",
+ size = "l",
+ easyClose = TRUE,
+ footer = tagList(
+ modalButton("Close"),
+ downloadButton("download_diversity_info", "Download")
+ ),
+ pre(
+ paste(capture.output(diversity_summary_info()), collapse = "\n")
+ )
+ ))
+ })
+
+
+ # Download Summary Info
+ output$download_diversity_info <- downloadHandler(
+ filename = function() {
+ paste("diversity_summary_", Sys.Date(), ".txt", sep = "")
+ },
+ content = function(file) {
+ # Write the summary info to a file
+ writeLines(paste(capture.output(diversity_summary_info()), collapse = "\n"), file)
+ }
+ )
}
## To be copied in the UI
# mod_diversity_ui("diversity_1")
## To be copied in the server
-# mod_diversity_server("diversity_1")
\ No newline at end of file
+# mod_diversity_server("diversity_1")
diff --git a/R/mod_dosage2vcf.R b/R/mod_dosage2vcf.R
index 645c7f8..2c8c879 100644
--- a/R/mod_dosage2vcf.R
+++ b/R/mod_dosage2vcf.R
@@ -8,43 +8,67 @@
#'
#' @importFrom shiny NS tagList
#' @importFrom shinyjs enable disable useShinyjs
+#' @import shinydisconnect
#'
mod_dosage2vcf_ui <- function(id){
ns <- NS(id)
tagList(
fluidPage(
- fluidRow(
- box(
- title = "Inputs", status = "info", solidHeader = TRUE, collapsible = FALSE, collapsed = FALSE,
- fileInput(ns("report_file"), "Choose DArT Dose Report File", accept = c(".csv")),
- fileInput(ns("counts_file"), "Choose DArT Counts File", accept = c(".csv")),
- textInput(ns("d2v_output_name"), "Output File Name"),
- numericInput(ns("dosage2vcf_ploidy"), "Species Ploidy", min = 1, value = NULL),
- actionButton(ns("run_analysis"), "Run Analysis"),
- useShinyjs(),
- downloadButton(ns('download_d2vcf'), "Download VCF File", class = "butt"),
- div(style="display:inline-block; float:right",dropdownButton(
- HTML("Input files"),
- p(downloadButton(ns('download_dose'), ""), "Dose Report Example File"),
- p(downloadButton(ns('download_counts'), ""), "Counts Example File"), hr(),
- p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
- p(HTML("Graphics description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
- p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
- circle = FALSE,
- status = "warning",
- icon = icon("info"), width = "500px",
- tooltip = tooltipOptions(title = "Click to see info!")
- ))
- ),
- valueBoxOutput(ns("ReportSnps"))
+ disconnectMessage(
+ text = "An unexpected error occurred, please reload the application and check the input file(s).",
+ refresh = "Reload now",
+ background = "white",
+ colour = "grey",
+ overlayColour = "grey",
+ overlayOpacity = 0.3,
+ refreshColour = "purple"
),
fluidRow(
- box(title = "Status", width = 3, collapsible = TRUE, status = "info",
+ column(width = 5,
+ box(
+ title = "Inputs", width=12, status = "info", solidHeader = TRUE, collapsible = FALSE, collapsed = FALSE,
+ selectInput(ns('file_type'), label = 'Select File Format', choices = c("DArT Dosage Reports","AgriSeq")),
+ conditionalPanel(condition = "input.file_type == 'DArT Dosage Reports'",
+ ns = ns,
+ fileInput(ns("report_file"), "Choose DArT Dose Report File", accept = c(".csv")),
+ fileInput(ns("counts_file"), "Choose DArT Counts File", accept = c(".csv")),
+ ),
+ conditionalPanel(condition = "input.file_type == 'AgriSeq'",
+ ns = ns,
+ "Support for this file type is in-progress",
+ "",
+ #fileInput(ns("agriseq_file"), "Choose Input File", accept = c(".csv"))
+ ),
+ textInput(ns("d2v_output_name"), "Output File Name"),
+ numericInput(ns("dosage2vcf_ploidy"), "Species Ploidy", min = 1, value = NULL),
+ #actionButton(ns("run_analysis"), "Run Analysis"),
+ useShinyjs(),
+ downloadButton(ns('download_d2vcf'), "Download VCF File", class = "butt"),
+ div(style="display:inline-block; float:right",dropdownButton(
+ HTML("Input files"),
+ p(downloadButton(ns('download_dose'), ""), "Dose Report Example File"),
+ p(downloadButton(ns('download_counts'), ""), "Counts Example File"), hr(),
+ p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("Graphics description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ actionButton(ns("d2vcf_summary"), "Summary"),
+ circle = FALSE,
+ status = "warning",
+ icon = icon("info"), width = "500px",
+ tooltip = tooltipOptions(title = "Click to see info!")
+ ))
+ )
+ ),
+ column(width = 4,
+ valueBoxOutput(ns("ReportSnps"), width=12),
+ box(title = "Status", width = 12, collapsible = TRUE, status = "info",
progressBar(id = ns("dosage2vcf_pb"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ")
+ )
+ ),
+ column(width = 1),
)
)
)
- )
}
#' dosage2vcf Server Functions
@@ -162,7 +186,8 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){
##This is for the DArT files conversion to VCF
output$download_d2vcf <- downloadHandler(
filename = function() {
- paste0(input$d2v_output_name, ".vcf.gz")
+ output_name <- gsub("\\.vcf$", "", input$d2v_output_name)
+ paste0(output_name, ".vcf.gz")
},
content = function(file) {
# Ensure the files are uploaded
@@ -226,6 +251,65 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){
updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete! - Downloading VCF")
}
)
+
+ ##Summary Info
+ d2vcf_summary_info <- function() {
+ #Handle possible NULL values for inputs
+ report_file_name <- if (!is.null(input$report_file$name)) input$report_file$name else "No file selected"
+ counts_file_name <- if (!is.null(input$counts_file$name)) input$counts_file$name else "No file selected"
+ selected_ploidy <- if (!is.null(input$dosage2vcf_ploidy)) as.character(input$dosage2vcf_ploidy) else "Not selected"
+
+ #Print the summary information
+ cat(
+ "BIGapp Dosage2VCF Summary\n",
+ "\n",
+ paste0("Date: ", Sys.Date()), "\n",
+ paste("R Version:", R.Version()$version.string), "\n",
+ "\n",
+ "### Input Files ###\n",
+ "\n",
+ paste("Input Dosage Report File:", report_file_name), "\n",
+ paste("Input Counts File:", counts_file_name), "\n",
+ "\n",
+ "### User Selected Parameters ###\n",
+ "\n",
+ paste("Selected Ploidy:", selected_ploidy), "\n",
+ "\n",
+ "### R Packages Used ###\n",
+ "\n",
+ paste("BIGapp:", packageVersion("BIGapp")), "\n",
+ paste("BIGr:", packageVersion("BIGr")), "\n",
+ sep = ""
+ )
+ }
+
+ # Popup for analysis summary
+ observeEvent(input$d2vcf_summary, {
+ showModal(modalDialog(
+ title = "Summary Information",
+ size = "l",
+ easyClose = TRUE,
+ footer = tagList(
+ modalButton("Close"),
+ downloadButton("download_d2vcf_info", "Download")
+ ),
+ pre(
+ paste(capture.output(d2vcf_summary_info()), collapse = "\n")
+ )
+ ))
+ })
+
+
+ # Download Summary Info
+ output$download_d2vcf_info <- downloadHandler(
+ filename = function() {
+ paste("Dosage2VCF_summary_", Sys.Date(), ".txt", sep = "")
+ },
+ content = function(file) {
+ # Write the summary info to a file
+ writeLines(paste(capture.output(d2vcf_summary_info()), collapse = "\n"), file)
+ }
+ )
}
## To be copied in the UI
diff --git a/R/mod_gwas.R b/R/mod_gwas.R
index a5b6282..999a634 100644
--- a/R/mod_gwas.R
+++ b/R/mod_gwas.R
@@ -11,19 +11,30 @@
#' @importFrom future availableCores
#' @importFrom shinycssloaders withSpinner
#' @importFrom shinyWidgets virtualSelectInput
+#' @import shinydisconnect
#'
mod_gwas_ui <- function(id){
ns <- NS(id)
tagList(
# Add GWAS content here
fluidRow(
+ disconnectMessage(
+ text = "An unexpected error occurred, please reload the application and check the input file(s).",
+ refresh = "Reload now",
+ background = "white",
+ colour = "grey",
+ overlayColour = "grey",
+ overlayOpacity = 0.3,
+ refreshColour = "purple"
+ ),
column(width = 3,
box(title="Inputs", width = 12, collapsible = TRUE, collapsed = FALSE, status = "info", solidHeader = TRUE,
fileInput(ns("gwas_file"), "Choose VCF File", accept = c(".csv",".vcf",".gz")),
- fileInput(ns("phenotype_file"), "Choose Passport File", accept = ".csv"),
+ fileInput(ns("phenotype_file"), "Choose Trait File", accept = ".csv"),
numericInput(ns("gwas_ploidy"), "Species Ploidy", min = 1, value = NULL),
- selectInput(ns('gwas_threshold'), label='Significance Threshold Method', choices = c("M.eff","Bonferroni","FDR","permute"), selected="M.eff"),
- selectInput(ns('trait_info'), label = 'Select Trait (eg. Color):', choices = NULL),
+ numericInput(ns("bp_window_before"), "Base pair window (Mb)", min = 0, value = 2),
+ selectInput(ns('gwas_threshold'), label='Significance Threshold', choices = c("M.eff","Bonferroni","FDR","permute"), selected="M.eff"),
+ selectInput(ns('trait_info'), label = 'Select Trait', choices = NULL),
virtualSelectInput(
inputId = ns("fixed_info"),
label = "Select Fixed Effects (optional):",
@@ -37,11 +48,12 @@ mod_gwas_ui <- function(id){
div(style="display:inline-block; float:right",dropdownButton(
HTML("Input files"),
p(downloadButton(ns('download_vcf'),""), "VCF Example File"),
- p(downloadButton(ns('download_pheno'),""), "Passport Example File"), hr(),
+ p(downloadButton(ns('download_pheno'),""), "Trait Example File"), hr(),
p(HTML("Parameters description:"), actionButton(ns("goGWASpar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
- p(HTML("Graphics description:"), actionButton(ns("goGWASgraph"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
+ p(HTML("Results description:"), actionButton(ns("goGWASgraph"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
p(HTML("How to cite:"), actionButton(ns("goGWAScite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
- p(HTML("GWASpoly tutorial:"), actionButton(ns("goGWASpoly"), icon("arrow-up-right-from-square", verify_fa = FALSE), onclick ="window.open('https://jendelman.github.io/GWASpoly/GWASpoly.html', '_blank')" )),
+ p(HTML("GWASpoly tutorial:"), actionButton(ns("goGWASpoly"), icon("arrow-up-right-from-square", verify_fa = FALSE), onclick ="window.open('https://jendelman.github.io/GWASpoly/GWASpoly.html', '_blank')" )),hr(),
+ actionButton(ns("gwas_summary"), "Summary"),
circle = FALSE,
status = "warning",
icon = icon("info"), width = "300px",
@@ -51,14 +63,46 @@ mod_gwas_ui <- function(id){
),
column(width = 6,
box(
- title = "Plots", status = "info", solidHeader = FALSE, width = 12, height = 600,
+ title = "Plots", status = "info", solidHeader = FALSE, width = 12,
bs4Dash::tabsetPanel(
tabPanel("BIC Plot", withSpinner(plotOutput(ns("bic_plot"), height = "500px"))),
tabPanel("Manhattan Plot", withSpinner(plotOutput(ns("manhattan_plot"), height = "500px"))),
tabPanel("QQ Plot", withSpinner(plotOutput(ns("qq_plot"), height = "500px"))),
tabPanel("BIC Table", withSpinner(DTOutput(ns("bic_table"))),style = "overflow-y: auto; height: 500px"),
- tabPanel("QTL - significant markers",
- withSpinner(DTOutput(ns('gwas_stats'))),style = "overflow-y: auto; height: 500px")
+ tabPanel("QTL - significant markers", withSpinner(DTOutput(ns("all_qtl"))),style = "overflow-y: auto; height: 500px"),
+ tabPanel("Filter QTL by LD window",
+ br(),
+ box(
+ title = "LD plot",solidHeader = FALSE, width = 12,
+ plotlyOutput(ns("LD_plot"), height = "500px"), br(),
+ sliderInput(ns("bp_window_after"), label = "Adjust base pair window here to filter QTLs", min = 0,
+ max = 100, value = 5, step = 1)
+ ),
+ box(
+ title = "Filtered QTL", solidHeader = FALSE, width = 12,
+ withSpinner(DTOutput(ns('gwas_stats')))
+ )
+ ),
+ tabPanel("Multiple QTL model",
+ br(),
+ pickerInput(
+ inputId = ns("sele_models"),
+ label = "Select model",
+ choices = "will be updated",
+ options = list(
+ `actions-box` = TRUE),
+ multiple = FALSE
+ ), hr(),
+ pickerInput(
+ inputId = ns("sele_qtl"),
+ label = "Select QTL",
+ choices = "will be updated",
+ options = list(
+ `actions-box` = TRUE),
+ multiple = TRUE
+ ), hr(),
+ withSpinner(DTOutput(ns('gwas_fitqtl'))),
+ style = "overflow-y: auto; height: 500px")
)
)
),
@@ -73,7 +117,8 @@ mod_gwas_ui <- function(id){
tags$h3("Save Image"),
selectInput(inputId = ns('gwas_figures'), label = 'Figure', choices = c("BIC Plot",
"Manhattan Plot",
- "QQ Plot")),
+ "QQ Plot",
+ "LD Plot")),
selectInput(inputId = ns('gwas_image_type'), label = 'File Type', choices = c("jpeg","tiff","png"), selected = "jpeg"),
sliderInput(inputId = ns('gwas_image_res'), label = 'Resolution', value = 300, min = 50, max = 1000, step=50),
sliderInput(inputId = ns('gwas_image_width'), label = 'Width', value = 9, min = 1, max = 20, step=0.5),
@@ -100,11 +145,12 @@ mod_gwas_ui <- function(id){
#' @importFrom stats BIC as.formula lm logLik median model.matrix na.omit prcomp qbeta quantile runif sd setNames
#' @importFrom bs4Dash updatebs4TabItems updateBox
#' @importFrom shiny updateTabsetPanel
+#' @importFrom plotly ggplotly
#' @noRd
mod_gwas_server <- function(input, output, session, parent_session){
ns <- session$ns
-
+
# Help links
observeEvent(input$goGWASpar, {
# change to help tab
@@ -150,9 +196,18 @@ mod_gwas_server <- function(input, output, session, parent_session){
output$gwas_stats <- renderDT(NULL)
##GWAS items
+ gwas_data <- reactiveValues(
+ data2 = NULL,
+ phenos = NULL
+ )
+
gwas_vars <- reactiveValues(
gwas_df = NULL,
+ gwas_df_filt = NULL,
+ fit_qtl = NULL,
manhattan_plots = NULL,
+ LD_plot = NULL,
+ bp_window = NULL,
qq_plots = NULL,
bic_df = NULL,
BIC_ggplot = NULL
@@ -177,7 +232,6 @@ mod_gwas_server <- function(input, output, session, parent_session){
#GWAS analysis (Shufen Chen and Meng Lin pipelines)
observeEvent(input$gwas_start, {
-
toggleClass(id = "gwas_ploidy", class = "borderred", condition = (is.na(input$gwas_ploidy) | is.null(input$gwas_ploidy)))
toggleClass(id = "trait_info", class = "borderred", condition = (all(is.na(input$trait_info)) | all(is.null(input$trait_info))))
@@ -271,7 +325,7 @@ mod_gwas_server <- function(input, output, session, parent_session){
temp_geno_file <- tempfile(fileext = ".csv")
#Convert VCF file if submitted
- vcf <- read.vcfR(input$gwas_file$datapath)
+ vcf <- read.vcfR(input$gwas_file$datapath, verbose = FALSE)
#Extract GT
geno_mat <- extract.gt(vcf, element = "GT")
@@ -280,7 +334,7 @@ mod_gwas_server <- function(input, output, session, parent_session){
info <- data.frame(vcf@fix)
gpoly_df <- cbind(info[,c("ID","CHROM","POS")], geno_mat)
- if(!any(colnames(gpoly_df) %in% phenotype_file$Sample_ID)) {
+ if(!any(colnames(gpoly_df) %in% phenotype_file[,1])) {
shinyalert(
title = "Samples ID do not match",
text = paste("Check if passport/phenotype files have same sample ID as the VCF/genotype file."),
@@ -298,7 +352,7 @@ mod_gwas_server <- function(input, output, session, parent_session){
}
validate(
- need(any(colnames(gpoly_df) %in% phenotype_file$Sample_ID), "The selected traits must be numerical.")
+ need(any(colnames(gpoly_df) %in% phenotype_file[,1]), "The selected traits must be numerical.")
)
write.csv(gpoly_df, file = temp_geno_file, row.names = FALSE)
@@ -332,6 +386,32 @@ mod_gwas_server <- function(input, output, session, parent_session){
return()
}
+ gwas_vars$LD_plot <- LD.plot(data)
+ lim.d <- max(gwas_vars$LD_plot$data$d)
+
+ if(input$bp_window_before > lim.d)
+ shinyalert(
+ title = "Adjust base pair window",
+ text = paste0("Base pair window larger than maximum distance (",lim.d,"). Reduce window size."),
+ size = "s",
+ closeOnEsc = TRUE,
+ closeOnClickOutside = FALSE,
+ html = TRUE,
+ type = "error",
+ showConfirmButton = TRUE,
+ confirmButtonText = "OK",
+ confirmButtonCol = "#004192",
+ showCancelButton = FALSE,
+ animation = TRUE
+ )
+
+ validate(
+ need(input$bp_window_before <= lim.d, paste0("Base pair window larger than maximum distance (",lim.d,"). Reduce window size."))
+ )
+
+ gwas_vars$bp_window <- input$bp_window_before
+ updateSliderInput(session = session, inputId = "bp_window_after", min = 0, max = round(lim.d,2), value = gwas_vars$bp_window, step = round(lim.d/150,4))
+
data.loco <- set.K(data,LOCO=F,n.core= as.numeric(cores))
#Delete temp pheno file
@@ -371,6 +451,7 @@ mod_gwas_server <- function(input, output, session, parent_session){
kin.adj<-posdefmat(K)
kin.test<-as.matrix(kin.adj)
+ phenos <- vector()
for (i in 2:ncol(GE)){
#model selection
@@ -399,12 +480,6 @@ mod_gwas_server <- function(input, output, session, parent_session){
#Save BIC plot
gwas_vars$BIC_ggplot <- p1
- #Display BIC figure
- output$bic_plot <- renderPlot({
- print(p1)
- })
- #dev.off()
-
#Save BIC plot info
gwas_vars$bic_df <- plotBICs_kinship
@@ -428,7 +503,6 @@ mod_gwas_server <- function(input, output, session, parent_session){
#Consider adding options for different thresholds
data2 <- set.threshold(data.loco.scan,method=input$gwas_threshold,level=0.05)
-
#Save manhattan plots to list (only for single trait analysis)
#if length(traits) == 1
manhattan_plot_list <- list()
@@ -436,33 +510,6 @@ mod_gwas_server <- function(input, output, session, parent_session){
#plot for six models per trait
manhattan_plot_list[["all"]] <- manhattan.plot(data2,traits=colnames(data@pheno[i]), models = model)+geom_point(size=3)+theme(text = element_text(size = 25),strip.text = element_text(face = "bold"))
- #Output the manhattan plots
- output$manhattan_plot <- renderPlot({
-
- print(manhattan_plot_list[[input$model_select]])
-
- })
-
-
- #get most significant SNPs per QTL file
- qtl <- get.QTL(data=data2,traits=colnames(data@pheno[i]),bp.window=5e6)
- qtl_d <- data.frame(qtl)
-
- #Save QTL info
- gwas_vars$gwas_df <- qtl_d
-
- output$gwas_stats <- renderDT({qtl_d}, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5))
-
- #Updating value boxes
- output$qtls_detected <- renderValueBox({
- valueBox(
- value = length(unique(qtl_d$Position)),
- subtitle = "QTLs Detected",
- icon = icon("dna"),
- color = "info"
- )
- })
-
#Status
updateProgressBar(session = session, id = "pb_gwas", value = 80, title = "GWAS Complete: Now Plotting Results")
@@ -472,18 +519,8 @@ mod_gwas_server <- function(input, output, session, parent_session){
#Save qq_plot info
gwas_vars$qq_plots <- data_qq
- output$qq_plot <- renderPlot({
- CMplot_shiny(data_qq,plot.type="q",col=c(1:8),
- ylab.pos=2,
- file.name=colnames(data@pheno[i]),
- conf.int=FALSE,
- box=F,multraits=TRUE,file.output=FALSE)
- })
-
#plot for each model per trait
for (j in 1:length(model)) {
- print(j)
-
data.loco.scan_2 <- GWASpoly(data=data.loco,models=model[j],
traits=colnames(data@pheno[i]),params=params,n.core= as.numeric(cores))
@@ -493,12 +530,206 @@ mod_gwas_server <- function(input, output, session, parent_session){
#Save manhattan plots
gwas_vars$manhattan_plots <- manhattan_plot_list
-
+ phenos[i] <- colnames(data@pheno[i])
}
+ gwas_data$data2 <- data2
+ gwas_data$phenos <- phenos[-which(is.na(phenos))]
+
+ qtl <- get.QTL(data=gwas_data$data2,traits=gwas_data$phenos,bp.window=0)
+ gwas_vars$gwas_df <- data.frame(qtl)
+
#Status
updateProgressBar(session = session, id = "pb_gwas", value = 100, status = "success", title = "Finished")
+ })
+
+ #Checking if any QTLs were detected and returning a user notice if not
+ observe({
+ req(gwas_vars$gwas_df)
+ if(dim(gwas_vars$gwas_df)[1] == 0) {
+ shinyalert(
+ title = "No QTL Detected",
+ text = "No QTL detected for this trait.",
+ size = "s",
+ closeOnEsc = TRUE,
+ closeOnClickOutside = FALSE,
+ html = TRUE,
+ type = "info",
+ showConfirmButton = TRUE,
+ confirmButtonText = "OK",
+ confirmButtonCol = "#004192",
+ showCancelButton = FALSE,
+ animation = TRUE
+ )
+ }
+
+ #Gracefully abort
+ return()
+ })
+
+ #Updating value boxes
+ output$qtls_detected <- renderValueBox({
+ valueBox(
+ value = length(unique(gwas_vars$gwas_df_filt$Position)),
+ subtitle = "QTLs Detected",
+ icon = icon("dna"),
+ color = "info"
+ )
+ })
+
+ # Tables
+ output$all_qtl <- renderDT({
+ #get most significant SNPs per QTL file
+ validate(
+ need(dim(gwas_vars$gwas_df)[1] > 0, "No QTL detected.")
+ )
+ gwas_vars$gwas_df
+ }, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5))
+
+
+ output$gwas_stats <- renderDT({
+ #get most significant SNPs per QTL file
+ lim.d <- max(gwas_vars$LD_plot$data$d)
+
+ validate(
+ need(gwas_vars$bp_window <= lim.d, paste0("Base pair window larger than maximum distance (",lim.d,"). Reduce window size."))
+ )
+ if(is.null(input$bp_window_after)) {
+ line <- gwas_vars$bp_window
+ } else line <- input$bp_window_after
+
+ qtl <- get.QTL(data=gwas_data$data2,traits=gwas_data$phenos,bp.window=line*1000000)
+ gwas_vars$gwas_df_filt <- data.frame(qtl)
+
+ validate(
+ need(dim(gwas_vars$gwas_df_filt)[1] > 0, "No QTL detected.")
+ )
+ gwas_vars$gwas_df_filt
+ }, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5))
+
+
+ observe({
+ req(gwas_vars$gwas_df_filt, nrow(gwas_vars$gwas_df_filt) > 0)
+ updatePickerInput(session = session, inputId = "sele_models", choices = unique(gwas_vars$gwas_df_filt$Model), selected = unique(gwas_vars$gwas_df_filt$Model)[1])
+ })
+
+ observe({
+ req(gwas_vars$gwas_df_filt, nrow(gwas_vars$gwas_df_filt) > 0)
+
+ df <- gwas_vars$gwas_df_filt %>% filter(Model %in% input$sele_models)
+ updatePickerInput(session = session, inputId = "sele_qtl", choices = unique(paste0(df$Marker, "_", df$Model)),
+ selected = unique(paste0(df$Marker, "_", df$Model)))
+ })
+
+ output$gwas_fitqtl <- renderDT({
+ validate(
+ need(dim(gwas_vars$gwas_df_filt)[1] > 0, "No QTL detected.")
+ )
+
+ df <- gwas_vars$gwas_df_filt[which(paste0(gwas_vars$gwas_df_filt$Marker, "_", gwas_vars$gwas_df_filt$Model) %in% input$sele_qtl),]
+
+ rm.qtl <- which(df$Model %in% c("diplo-general", "diplo-additive"))
+ if(length(rm.qtl) > 0){
+ shinyalert(
+ title = "Oops",
+ text = "QTL detected by the models diplo-general and diplo-additive are not supported in the fit.QTL current version",
+ size = "xs",
+ closeOnEsc = TRUE,
+ closeOnClickOutside = FALSE,
+ html = TRUE,
+ type = "info",
+ showConfirmButton = TRUE,
+ confirmButtonText = "OK",
+ confirmButtonCol = "#004192",
+ showCancelButton = FALSE,
+ imageUrl = "",
+ animation = TRUE,
+ )
+ }
+
+ if(length(df$Model) >0){
+ rm.qtl <- which(df$Model %in% c("diplo-general", "diplo-additive"))
+ if(length(rm.qtl) > 0){
+ warning("QTL detected by the models diplo-general and diplo-additive are not supported in the fit.QTL current version")
+ qtl <- df[-rm.qtl,]
+ } else qtl <- df
+
+ validate(
+ need(dim(qtl)[1] > 0, "No QTL evaluated")
+ )
+
+ fit.ans_temp <- fit.QTL(data=gwas_data$data2,
+ trait=input$trait_info,
+ qtl=qtl[,c("Marker","Model")])
+ gwas_vars$fit_qtl <- fit.ans_temp
+ } else gwas_vars$fit_qtl <- NULL
+
+ gwas_vars$fit_qtl
+ }, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5))
+
+ # Plots
+ #Output the manhattan plots
+ output$manhattan_plot <- renderPlot({
+ validate(
+ need(!is.null(gwas_vars$manhattan_plots), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.")
+ )
+ print(gwas_vars$manhattan_plots[[input$model_select]])
+
+ })
+ output$qq_plot <- renderPlot({
+ validate(
+ need(!is.null(gwas_vars$qq_plots), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.")
+ )
+ CMplot_shiny(gwas_vars$qq_plots,plot.type="q",col=c(1:8),
+ ylab.pos=2,
+ file.name=input$trait_info,
+ conf.int=FALSE,
+ box=F,
+ multraits=TRUE,
+ file.output=FALSE)
+ })
+
+ #Display BIC figure
+ output$bic_plot <- renderPlot({
+ validate(
+ need(!is.null(gwas_vars$BIC_ggplot), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.")
+ )
+
+ print(gwas_vars$BIC_ggplot)
+ })
+
+ output$LD_plot <- renderPlotly({
+
+ validate(
+ need(!is.null(gwas_vars$LD_plot), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.")
+ )
+
+ lim.d <- max(gwas_vars$LD_plot$data$d)
+
+ validate(
+ need(gwas_vars$bp_window <= lim.d, paste0("Base pair window larger than maximum distance (",lim.d,"). Reduce window size."))
+ )
+ if(is.null(input$bp_window_after)) {
+ line <- gwas_vars$bp_window
+ } else line <- input$bp_window_after
+
+ p <- gwas_vars$LD_plot + geom_vline(aes(xintercept=line, color = "bp window"),linetype="dashed") +
+ theme(legend.title=element_blank(), legend.position=c(1,1),legend.justification = c(1,1), text = element_text(size = 15)) +
+ labs(y = "R-squared")
+
+ updateNumericInput(session = session, inputId = "bp_window_before",value = line)
+
+ ggplotly(p) %>%
+ layout(
+ legend = list(
+ title = list(text = ''), # Explicitly remove legend title in plotly
+ x = 1, # X position (right)
+ y = 1, # Y position (top)
+ xanchor = 'right', # Anchor the legend at the right
+ yanchor = 'top' # Anchor the legend at the top
+ )
+ )
})
#Download files for GWAS
@@ -510,13 +741,27 @@ mod_gwas_server <- function(input, output, session, parent_session){
# Temporary files list
temp_dir <- tempdir()
temp_files <- c()
-
+
if (!is.null(gwas_vars$gwas_df)) {
# Create a temporary file for assignments
- gwas_file <- file.path(temp_dir, paste0("QTL-statistics-", Sys.Date(), ".csv"))
+ gwas_file <- file.path(temp_dir, paste0("QTL-Significant_Markers-statistics-", Sys.Date(), ".csv"))
write.csv(gwas_vars$gwas_df, gwas_file, row.names = FALSE)
temp_files <- c(temp_files, gwas_file)
}
+
+ if (!is.null(gwas_vars$gwas_df_filt)) {
+ # Create a temporary file for assignments
+ gwas_file <- file.path(temp_dir, paste0("QTL-LD-filtered-statistics-", Sys.Date(), ".csv"))
+ write.csv(gwas_vars$gwas_df_filt, gwas_file, row.names = FALSE)
+ temp_files <- c(temp_files, gwas_file)
+ }
+
+ if (!is.null(gwas_vars$fit_qtl)) {
+ # Create a temporary file for assignments
+ gwas_file <- file.path(temp_dir, paste0("Multiple-QTL-model-statistics-", Sys.Date(), ".csv"))
+ write.csv(gwas_vars$fit_qtl, gwas_file, row.names = FALSE)
+ temp_files <- c(temp_files, gwas_file)
+ }
if (!is.null(gwas_vars$bic_df)) {
# Create a temporary file for BIC data frame
@@ -564,6 +809,11 @@ mod_gwas_server <- function(input, output, session, parent_session){
req(gwas_vars$BIC_ggplot)
print(gwas_vars$BIC_ggplot)
+ } else if (input$gwas_figures == "LD Plot") {
+ req(gwas_vars$LD_plot)
+ #Plot
+ print(gwas_vars$LD_plot)
+
} else if (input$gwas_figures == "Manhattan Plot") {
req(gwas_vars$manhattan_plots, input$model_select)
#Plot
@@ -603,6 +853,72 @@ mod_gwas_server <- function(input, output, session, parent_session){
ex <- system.file("iris_passport_file.csv", package = "BIGapp")
file.copy(ex, file)
})
+
+ ##Summary Info
+ gwas_summary_info <- function() {
+ #Handle possible NULL values for inputs
+ dosage_file_name <- if (!is.null(input$gwas_file$name)) input$gwas_file$name else "No file selected"
+ passport_file_name <- if (!is.null(input$phenotype_file$name)) input$phenotype_file$name else "No file selected"
+ selected_ploidy <- if (!is.null(input$gwas_ploidy)) as.character(input$gwas_ploidy) else "Not selected"
+
+ #Print the summary information
+ cat(
+ "BIGapp GWAS Summary\n",
+ "\n",
+ paste0("Date: ", Sys.Date()), "\n",
+ paste("R Version:", R.Version()$version.string), "\n",
+ "\n",
+ "### Input Files ###\n",
+ "\n",
+ paste("Input Genotype File:", dosage_file_name), "\n",
+ paste("Input Passport File:", passport_file_name), "\n",
+ "\n",
+ "### User Selected Parameters ###\n",
+ "\n",
+ paste("Selected Ploidy:", selected_ploidy), "\n",
+ paste("Significance Threshold Method:", input$gwas_threshold), "\n",
+ paste("Selected Trait:", input$trait_info), "\n",
+ paste("Selected Fixed Effects:", input$fixed_info), "\n",
+ "\n",
+ "### R Packages Used ###\n",
+ "\n",
+ paste("BIGapp:", packageVersion("BIGapp")), "\n",
+ paste("AGHmatrix:", packageVersion("AGHmatrix")), "\n",
+ paste("ggplot2:", packageVersion("ggplot2")), "\n",
+ paste("GWASpoly:", packageVersion("GWASpoly")), "\n",
+ paste("vcfR:", packageVersion("vcfR")), "\n",
+ paste("Matrix:", packageVersion("Matrix")), "\n",
+ sep = ""
+ )
+ }
+
+ # Popup for analysis summary
+ observeEvent(input$gwas_summary, {
+ showModal(modalDialog(
+ title = "Summary Information",
+ size = "l",
+ easyClose = TRUE,
+ footer = tagList(
+ modalButton("Close"),
+ downloadButton("download_gwas_info", "Download")
+ ),
+ pre(
+ paste(capture.output(gwas_summary_info()), collapse = "\n")
+ )
+ ))
+ })
+
+
+ # Download Summary Info
+ output$download_gwas_info <- downloadHandler(
+ filename = function() {
+ paste("gwas_summary_", Sys.Date(), ".txt", sep = "")
+ },
+ content = function(file) {
+ # Write the summary info to a file
+ writeLines(paste(capture.output(gwas_summary_info()), collapse = "\n"), file)
+ }
+ )
}
## To be copied in the UI
diff --git a/R/mod_help.R b/R/mod_help.R
index 19960f0..8020916 100644
--- a/R/mod_help.R
+++ b/R/mod_help.R
@@ -13,21 +13,21 @@ mod_help_ui <- function(id){
fluidPage(
column(width=12),
column(width=12,
- box(title="DArT Report2VCF", id = "DArT_Report2VCF_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE,
- "**Draft**This tab is designed to convert the DArT Dose Report and Counts files to a VCF file. **DArT Website**",
+ box(title="Convert to VCF", id = "DArT_Report2VCF_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE,
+ "This tab converts the processed genotype and counts files from DArT into a VCF file (v4.3). This file can then be used as the genotype input for the analyses within BIGapp or used with other genomics applications.",
br(), br(),
bs4Dash::tabsetPanel(id = "DArT_Report2VCF_tabset",
- tabPanel("Parameters description", value = "DArT_Report2VCF_par",
+ tabPanel("Parameters description", value = "DArT_Report2VCF_par", br(),
includeMarkdown(system.file("help_files/DArT_Report2VCF_par.Rmd", package = "BIGapp"))
),
- tabPanel("Results description", value = "DArT_Report2VCF_results",
+ tabPanel("Results description", value = "DArT_Report2VCF_results", br(),
includeMarkdown(system.file("help_files/DArT_Report2VCF_res.Rmd", package = "BIGapp"))
),
- tabPanel("How to cite", value = "DArT_Report2VCF_cite",
+ tabPanel("How to cite", value = "DArT_Report2VCF_cite", br(),
includeMarkdown(system.file("help_files/DArT_Report2VCF_cite.Rmd", package = "BIGapp"))
))
),
- box(title="Updog Dosage Calling", id = "Updog_Dosage_Calling_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE,
+ box(title="Dosage Calling", id = "Updog_Dosage_Calling_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE,
"This tab is designed to handle the process of dosage calling in genomic data. Dosage calling is essential for determining the number of copies of a particular allele at each genomic location.",
br(), br(),
bs4Dash::tabsetPanel(id = "Updog_Dosage_Calling_tabset",
@@ -42,52 +42,58 @@ mod_help_ui <- function(id){
))
),
box(title="VCF Filtering", id = "VCF_Filtering_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE,
+ "Filter SNPs and samples in a VCF file based on missing data, minor allele frequency, read depth, and Updog dosage calling metrics",
+ br(), br(),
bs4Dash::tabsetPanel(id = "VCF_Filtering_tabset",
- tabPanel("Parameters description", value = "VCF_Filtering_par",
+ tabPanel("Parameters description", value = "VCF_Filtering_par", br(),
includeMarkdown(system.file("help_files/VCF_Filtering_par.Rmd", package = "BIGapp"))
),
- tabPanel("Results description", value = "VCF_Filtering_results",
- includeMarkdown(system.file("help_files/VCF_Filtering_par.Rmd", package = "BIGapp"))
+ tabPanel("Results description", value = "VCF_Filtering_results", br(),
+ includeMarkdown(system.file("help_files/VCF_Filtering_res.Rmd", package = "BIGapp"))
),
- tabPanel("How to cite", value = "Updog_Dosage_Calling_cite",
+ tabPanel("How to cite", value = "VCF_Filtering_cite", br(),
includeMarkdown(system.file("help_files/VCF_Filtering_cite.Rmd", package = "BIGapp"))
))
),
box(title="PCA", id = "PCA_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE,
+ "This tab is used to perform a PCA to visualize the genomic relationships between samples (population structure)",
+ br(), br(),
bs4Dash::tabsetPanel(id = "PCA_tabset",
- tabPanel("Parameters description", value = "PCA_par",
+ tabPanel("Parameters description", value = "PCA_par", br(),
includeMarkdown(system.file("help_files/PCA_par.Rmd", package = "BIGapp"))
),
- tabPanel("Results description", value = "PCA_results",
+ tabPanel("Results description", value = "PCA_results", br(),
includeMarkdown(system.file("help_files/PCA_res.Rmd", package = "BIGapp"))
),
- tabPanel("How to cite", value = "PCA_cite",
+ tabPanel("How to cite", value = "PCA_cite", br(),
includeMarkdown(system.file("help_files/PCA_cite.Rmd", package = "BIGapp"))
))
),
box(title="DAPC", id = "DAPC_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE,
+ "This tab group estimates the number of distinct groups that are present within the genomic dataset, and classifies each sample into a distinct group.",
+ br(), br(),
bs4Dash::tabsetPanel(id = "DAPC_tabset",
- tabPanel("Parameters description", value = "DAPC_par",
+ tabPanel("Parameters description", value = "DAPC_par", br(),
includeMarkdown(system.file("help_files/DAPC_par.Rmd", package = "BIGapp"))
),
- tabPanel("Results description", value = "DAPC_results",
+ tabPanel("Results description", value = "DAPC_results", br(),
includeMarkdown(system.file("help_files/DAPC_res.Rmd", package = "BIGapp"))
),
- tabPanel("How to cite", value = "DAPC_cite",
+ tabPanel("How to cite", value = "DAPC_cite", br(),
includeMarkdown(system.file("help_files/DAPC_cite.Rmd", package = "BIGapp"))
))
),
box(title="Genomic Diversity", id = "Genomic_Diversity_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE,
- "**Draft**This tab is dedicated to analyzing genomic diversity within the population. It calculates various diversity metrics such as heterozygosity and minor allele frequency (MAF). The app includes functionalities to visualize these metrics through histograms and other plots. Users can download the calculated diversity metrics as CSV files. This tab helps in understanding the genetic variability and distribution of alleles within the population.",
+ "This tab estimates summary metrics for the samples and SNPs within a genomic dataset and produces figures and tables.",
br(), br(),
bs4Dash::tabsetPanel(id = "Genomic_Diversity_tabset",
- tabPanel("Parameters description", value = "Genomic_Diversity_par",
+ tabPanel("Parameters description", value = "Genomic_Diversity_par", br(),
includeMarkdown(system.file("help_files/Genomic_Diversity_par.Rmd", package = "BIGapp"))
),
- tabPanel("Results description", value = "Genomic_Diversity_results",
+ tabPanel("Results description", value = "Genomic_Diversity_results", br(),
includeMarkdown(system.file("help_files/Genomic_Diversity_res.Rmd", package = "BIGapp"))
),
- tabPanel("How to cite", value = "Genomic_Diversity_cite",
+ tabPanel("How to cite", value = "Genomic_Diversity_cite", br(),
includeMarkdown(system.file("help_files/Genomic_Diversity_cite.Rmd", package = "BIGapp"))
))
),
@@ -106,26 +112,30 @@ mod_help_ui <- function(id){
))
),
box(title="Predictive Ability", id = "Predictive_Ability_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE,
+ "This tab provides the predictive ability of a GBLUP model for each trait across all samples within a genomic dataset",
+ br(), br(),
bs4Dash::tabsetPanel(id = "Predictive_Ability_tabset",
- tabPanel("Parameters description", value = "Predictive_Ability_par",
+ tabPanel("Parameters description", value = "Predictive_Ability_par", br(),
includeMarkdown(system.file("help_files/Predictive_Ability_par.Rmd", package = "BIGapp"))
),
- tabPanel("Results description", value = "Predictive_Ability_results",
+ tabPanel("Results description", value = "Predictive_Ability_results", br(),
includeMarkdown(system.file("help_files/Predictive_Ability_res.Rmd", package = "BIGapp"))
),
- tabPanel("How to cite", value = "Predictive_Ability_cite",
+ tabPanel("How to cite", value = "Predictive_Ability_cite", br(),
includeMarkdown(system.file("help_files/Predictive_Ability_cite.Rmd", package = "BIGapp"))
))
),
box(title="Genomic Prediction", id = "Genomic_Prediction_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE,
+ "his tab estimates the trait and estimated-breeding-values (EBVs) for either all individuals in a genomic dataset, or by training the model with one genomic dataset to predict the values in another.",
+ br(), br(),
bs4Dash::tabsetPanel(id = "Genomic_Prediction_tabset",
- tabPanel("Parameters description", value = "Genomic_Prediction_par",
+ tabPanel("Parameters description", value = "Genomic_Prediction_par", br(),
includeMarkdown(system.file("help_files/Genomic_Prediction_par.Rmd", package = "BIGapp"))
),
- tabPanel("Results description", value = "Genomic_Prediction_results",
+ tabPanel("Results description", value = "Genomic_Prediction_results", br(),
includeMarkdown(system.file("help_files/Genomic_Prediction_res.Rmd", package = "BIGapp"))
),
- tabPanel("How to cite", value = "Genomic_Prediction_cite",
+ tabPanel("How to cite", value = "Genomic_Prediction_cite", br(),
includeMarkdown(system.file("help_files/Genomic_Prediction_cite.Rmd", package = "BIGapp"))
))
),
diff --git a/R/run_app.R b/R/run_app.R
index 5d60ac1..baf85e7 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -7,6 +7,7 @@
#' @export
#' @importFrom shiny shinyApp
#' @importFrom golem with_golem_options
+#'
run_app <- function(
onStart = NULL,
options = list(),
@@ -14,6 +15,10 @@ run_app <- function(
uiPattern = "/",
...
) {
+ #Uncomment the sink command to allow console output
+ sink(file = tempfile())
+ options(warn = -1)
+
with_golem_options(
app = shinyApp(
ui = app_ui,
diff --git a/R/utils.R b/R/utils.R
index 975e339..096f3ef 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -4,7 +4,11 @@ get_counts <- function(madc_file, output_name) {
# Note: This assumes that the first 7 rows are not useful here like in the Strawberry DSt23-8501_MADC file
# Read the madc file
- madc_df <- read.csv(madc_file, sep = ',', skip = 7, check.names = FALSE)
+ madc_df <- read.csv(madc_file, sep = ',', check.names = FALSE, header = FALSE)
+ header <- grep("AlleleID", madc_df[,1])
+ if(header > 1) madc_df <- madc_df[-c(1:(grep("AlleleID", madc_df[,1]))-1),]
+ colnames(madc_df) <- madc_df[1,]
+ madc_df <- madc_df[-1,]
# Retain only the Ref and Alt haplotypes
filtered_df <- madc_df[!grepl("\\|AltMatch|\\|RefMatch", madc_df$AlleleID), ]
@@ -21,23 +25,23 @@ get_counts <- function(madc_file, output_name) {
#Add functionality here to stop the script if indentical() is False
get_matrices <- function(result_df) {
#This function takes the dataframe of ref and alt counts for each sample, and converts them to ref, alt, and size(total count) matrices for Updog
-
+
update_df <- result_df
-
+
# Filter rows where 'AlleleID' ends with 'Ref'
ref_df <- subset(update_df, grepl("Ref$", AlleleID))
-
+
# Filter rows where 'AlleleID' ends with 'Alt'
alt_df <- subset(update_df, grepl("Alt$", AlleleID))
-
+
#Ensure that each has the same SNPs and that they are in the same order
same <- identical(alt_df$CloneID,ref_df$CloneID)
-
+
###Convert the ref and alt counts into matrices with the CloneID as the index
#Set SNP names as index
row.names(ref_df) <- ref_df$CloneID
row.names(alt_df) <- alt_df$CloneID
-
+
#Retain only the rows in common if they are not identical and provide warning
if (same == FALSE) {
warning("Mismatch between Ref and Alt Markers. MADC likely altered. Markers without a Ref or Alt match removed.")
@@ -47,26 +51,30 @@ get_matrices <- function(result_df) {
ref_df <- ref_df[common_ids, ]
alt_df <- alt_df[common_ids, ]
}
-
+
#Remove unwanted columns and convert to matrix
- ref_matrix <- as.matrix(ref_df[, -c(1:16)])
- alt_matrix <- as.matrix(alt_df[, -c(1:16)])
-
+ rm.col <- c("AlleleID", "CloneID", "AlleleSequence", "ClusterConsensusSequence",
+ "CallRate", "OneRatioRef", "OneRatioSnp", "FreqHomRef", "FreqHomSnp",
+ "FreqHets", "PICRef", "PICSnp", "AvgPIC", "AvgCountRef", "AvgCountSnp","RatioAvgCountRefAvgCountSnp")
+
+ ref_matrix <- as.matrix(ref_df[, -which(colnames(ref_df) %in% rm.col)])
+ alt_matrix <- as.matrix(alt_df[, -which(colnames(alt_df) %in% rm.col)])
+
#Convert elements to numeric
class(ref_matrix) <- "numeric"
class(alt_matrix) <- "numeric"
-
+
#Make the size matrix by combining the two matrices
size_matrix <- (ref_matrix + alt_matrix)
-
+
#Count the number of cells with 0 count to estimate missing data
# Count the number of cells with the value 0
count_zeros <- sum(size_matrix == 0)
-
+
# Print the result
ratio_missing_data <- count_zeros / length(size_matrix)
cat("Ratio of missing data =", ratio_missing_data, "\n")
-
+
# Return the ref and alt matrices as a list
matrices_list <- list(ref_matrix = ref_matrix, size_matrix = size_matrix)
return(matrices_list)
@@ -202,7 +210,7 @@ calculate_heterozygosity <- function(genotype_matrix, ploidy = 2) {
# Create a dataframe with Sample ID and Observed Heterozygosity
result_df <- data.frame(
SampleID = colnames(genotype_matrix),
- ObservedHeterozygosity = heterozygosity_proportion,
+ Ho = heterozygosity_proportion,
row.names = NULL,
check.names = FALSE
)
diff --git a/README.md b/README.md
index 09ca3c4..1f3f10d 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,9 @@
[![Development](https://img.shields.io/badge/development-active-blue.svg)](https://img.shields.io/badge/development-active-blue.svg)
-# (B)reeding (I)nsight (G)enomics app
+# (B)reeding (I)nsight (G)enomics app
-Currently, Breeding Insight provides bioinformatic processing support for our external collaborators. This R shiny app will provide a web-based user friendly way for our internal and external collaborators to analyze genomic data without needing to use command-line tools.
+The BIGapp is a user-friendly tool for processing low to mid-density genotyping data for diploid and polyploid species. This R shiny app provides a web-based user friendly way for users to analyze genomic data without needing to use command-line tools. Additional analysis will be added, with the initial focus on a core set of features for supporting breeding decisions.
### Supported Analyses
@@ -17,16 +17,17 @@ Supported:
- SNP filtering
- Sample filtering
- Summary metrics
- - SNP Allele Frequency
+ - SNP Polymorphism Information Content
- SNP Minor Allele Frequency
- Sample Observed Heterozygosity
- Population Structure
- PCA
- DAPC
- GWAS
+ - GWASpoly
- GS
- Estimate Model Prediction Accuracy
- - Predict Trait Values for New Genotypes
+ - Predict Phenotype Values and EBVs for Samples
### Running the BIG app
diff --git a/inst/app/www/BIG_R_logo.png b/inst/app/www/BIG_R_logo.png
index f654b75..0599b1e 100644
Binary files a/inst/app/www/BIG_R_logo.png and b/inst/app/www/BIG_R_logo.png differ
diff --git a/inst/app/www/BIG_logo_edit.png b/inst/app/www/BIG_logo_edit.png
deleted file mode 100644
index 0c2b780..0000000
Binary files a/inst/app/www/BIG_logo_edit.png and /dev/null differ
diff --git a/inst/app/www/favicon.ico b/inst/app/www/favicon.ico
index ca0b26c..c301bc1 100644
Binary files a/inst/app/www/favicon.ico and b/inst/app/www/favicon.ico differ
diff --git a/inst/app/www/golem_favicon.ico b/inst/app/www/golem_favicon.ico
deleted file mode 100644
index 4c0982c..0000000
Binary files a/inst/app/www/golem_favicon.ico and /dev/null differ
diff --git a/inst/help_files/DAPC_cite.Rmd b/inst/help_files/DAPC_cite.Rmd
index 1458b7c..c590227 100644
--- a/inst/help_files/DAPC_cite.Rmd
+++ b/inst/help_files/DAPC_cite.Rmd
@@ -4,3 +4,16 @@ output: html_document
date: "2024-08-29"
---
+* **BIGapp**
+
+* **BIGr**
+
+* **vcfR**
+
+Knaus BJ, Grünwald NJ (2017). “VCFR: a package to manipulate and visualize variant call format data in R.” Molecular Ecology Resources, 17(1), 44–53. ISSN 757, https://dx.doi.org/10.1111/1755-0998.12549.
+
+Knaus BJ, Grünwald NJ (2016). “VcfR: an R package to manipulate and visualize VCF format data.” BioRxiv. https://dx.doi.org/10.1101/041277.
+
+* **adegenet**
+
+Jombart, T. (2008). adegenet: a R package for the multivariate analysis of genetic markers. Bioinformatics, 24(11), 1403–1405.
diff --git a/inst/help_files/DAPC_par.Rmd b/inst/help_files/DAPC_par.Rmd
index b566e42..ce4577a 100644
--- a/inst/help_files/DAPC_par.Rmd
+++ b/inst/help_files/DAPC_par.Rmd
@@ -4,3 +4,30 @@ output: html_document
date: "2024-08-29"
---
+* **VCF file**
+Variant Call Format (VCF) is a standard file format to store genetic variant information. The genotype (GT) data within the VCF is required for the analysis in this tab. For more details about the VCF format, see this document: https://samtools.github.io/hts-specs/VCFv4.2.pdf.
+
+* **Passport file**
+A comma-separated values (CSV) file containing individual names (Sample_ID) in the first column and phenotype values in the subsequent columns. The phenotype column names should correspond to the phenotype ID. Example:
+
+
+
+|Sample_ID | Sepal.Length| Sepal.Width| Petal.Length| Petal.Width|Species |
+|:---------:|:------------:|:-----------:|:------------:|:-----------:|:-------:|
+|Sample_1 | 5.1| 3.5| 1.4| 0.2|versicolor |
+|Sample_2 | 4.9| 3.0| 1.4| 0.2|setosa |
+|Sample_3 | 4.7| 3.2| 1.3| 0.2|setosa |
+|Sample_4 | 4.6| 3.1| 1.5| 0.2|setosa |
+|Sample_5 | 5.0| 3.6| 1.4| 0.2|setosa |
+|Sample_6 | 5.4| 3.9| 1.7| 0.4|setosa |
+
+
+
+
+
+* **Species Ploidy**
+Specifies the ploidy level of the species. The current analysis supports both diploids and autopolyploids.
+
+* **Maximum K**
+
+* **Number of Clusters (K)**
diff --git a/inst/help_files/DAPC_res.Rmd b/inst/help_files/DAPC_res.Rmd
index 0932956..f31e234 100644
--- a/inst/help_files/DAPC_res.Rmd
+++ b/inst/help_files/DAPC_res.Rmd
@@ -4,3 +4,36 @@ output: html_document
date: "2024-08-29"
---
+#### BIC Table
+
+- **Description**: The BIC table summarizes the association between different numbers of clusters (K) and their Bayesian Information Criterion (BIC) values.
+- **Interpretation**: Each row corresponds to a different number of clusters (K), with the BIC value indicating the model fit. Typically, lower BIC values suggest a better model fit, pointing to the optimal number of genetic clusters. This can be viewed as the "knee", where the BIC values stabilize after decreasing sharply.
+- **Use**: In breeding applications, the table helps identify the most suitable number of genetic clusters, providing insights into genetic diversity and structure that inform breeding choices.
+
+#### BIC Plot
+
+- **Description**: The BIC plot graphically displays the BIC values for different numbers of clusters (K) derived from the data, typically to determine the optimal number of clusters.
+- **Interpretation**: The x-axis presents different cluster numbers (K), while the y-axis indicates corresponding BIC values. The optimal number of clusters is suggested by an "elbow" or point where BIC values level off, indicating diminishing returns for increasing the number of clusters.
+- **Use**: This plot is crucial for deciding how many distinct genetic clusters exist within the population. In breeding, it helps determine the number of genetically distinct groups worth considering in breeding programs. In population genetics, it guides researchers in identifying sub-population structures.
+
+#### DAPC Results Table
+
+- **Description**: The DAPC results table provides a detailed view of sample membership probabilities across different genetic clusters, and identifies the most likely cluster assignment for each sample.
+- **Interpretation**: Each row corresponds to a specific sample, with columns representing the probability of membership in each cluster based on discriminant analysis. The final column, "Cluster_Assignment," indicates the cluster to which a sample is most strongly associated, based on the highest probability value.
+- **Use**: This table is instrumental in both breeding and population genetics. In breeding, it aids in identifying which genetic clusters individuals belong to, helping in the selection of genetically favorable traits. In population genetics, it helps delineate the distribution of genetic diversity across the population, facilitating studies on migration, adaptation, and evolutionary patterns.
+
+- **Example Table**:
+
+ | Sample | Cluster_1 | Cluster_2 | Cluster_3 | Cluster_Assignment |
+ |:--------:|:---------:|:---------:|:---------:|:------------------:|
+ | Sample_1 | 0.80 | 0.10 | 0.10 | 1 |
+ | Sample_2 | 0.20 | 0.70 | 0.10 | 2 |
+ | Sample_3 | 0.15 | 0.15 | 0.70 | 3 |
+ | Sample_4 | 0.40 | 0.50 | 0.10 | 2 |
+ | Sample_5 | 0.60 | 0.20 | 0.20 | 1 |
+
+#### DAPC Plot
+
+- **Description**: The DAPC plot visualizes the discriminant analysis results, showing genetic sample distribution based on discriminant functions.
+- **Interpretation**: Each point represents a genetic sample, plotted based on its discriminant function scores, which are linear combinations of principal components. Clusters of points suggest genetic similarity or differentiation.
+- **Use**: This plot is instrumental in distinguishing between genetic clusters and assessing population structure. For breeding applications, it facilitates the selection of genetically similar or diverse individuals suited for breeding objectives. In population genetics studies, it provides insights into the genetic differentiation between groups.
diff --git a/inst/help_files/DArT_Report2VCF_cite.Rmd b/inst/help_files/DArT_Report2VCF_cite.Rmd
index d3be88a..7ff8c10 100644
--- a/inst/help_files/DArT_Report2VCF_cite.Rmd
+++ b/inst/help_files/DArT_Report2VCF_cite.Rmd
@@ -4,3 +4,13 @@ output: html_document
date: "2024-08-29"
---
+* **BIGapp**
+
+
+* **BIGr**
+
+* **vcfR**
+
+Knaus BJ, Grünwald NJ (2017). “VCFR: a package to manipulate and visualize variant call format data in R.” Molecular Ecology Resources, 17(1), 44–53. ISSN 757, https://dx.doi.org/10.1111/1755-0998.12549.
+
+Knaus BJ, Grünwald NJ (2016). “VcfR: an R package to manipulate and visualize VCF format data.” BioRxiv. https://dx.doi.org/10.1101/041277.
diff --git a/inst/help_files/DArT_Report2VCF_par.Rmd b/inst/help_files/DArT_Report2VCF_par.Rmd
index ccf8d3c..4e6447f 100644
--- a/inst/help_files/DArT_Report2VCF_par.Rmd
+++ b/inst/help_files/DArT_Report2VCF_par.Rmd
@@ -4,3 +4,14 @@ output: html_document
date: "2024-08-29"
---
+* **DArTag Dosage Report**
+
+The DArT Dosage Report is a tab-separated file provided by DArT from a sequencing project. It contains the genotype information for each of the target markers for all samples in the sequencing project. The markers are in rows and the samples are in the columns. There are several summary metric columns that preceed the sample genotype columns. The genotype calls are the count of the reference allele, where 0 is equal to homozygous alternate.
+
+* **DArTag Counts File**
+
+The DArT counts file is a tab-separated file provided by DArT from a sequencing project. It contains the read count information for the referance and alternate allele at each target marker. The marker information are in the rows and the samples are in the columns. There are several information columns that preceed the sample columns. There are two versions of this file. The “collapsed counts” version contains the target markers that includes their multiallic read counts in their total counts. The “Counts” file contains the read counts for the target markers only (excluding the multiallelic read count information).
+
+* **Species Ploidy**
+
+Specifies the ploidy level of the species. The current analysis supports both diploids and autopolyploids.
diff --git a/inst/help_files/DArT_Report2VCF_res.Rmd b/inst/help_files/DArT_Report2VCF_res.Rmd
index 8f71982..0c70d3f 100644
--- a/inst/help_files/DArT_Report2VCF_res.Rmd
+++ b/inst/help_files/DArT_Report2VCF_res.Rmd
@@ -4,3 +4,6 @@ output: html_document
date: "2024-08-29"
---
+* **VCF file (v4.3)**
+
+Variant Call Format (VCF) is a standard file format to store genetic variant information. The genotype (GT) data within the VCF is converted from the numeric dosage call information. Included is the read counts for each marker/sample and the numeric dosage call (UD) data. For more details about the VCF format, see this document: https://samtools.github.io/hts-specs/VCFv4.2.pdf.
diff --git a/inst/help_files/GWAS_par.Rmd b/inst/help_files/GWAS_par.Rmd
index 8a8aaff..7082696 100644
--- a/inst/help_files/GWAS_par.Rmd
+++ b/inst/help_files/GWAS_par.Rmd
@@ -35,3 +35,7 @@ date: "2024-08-29"
* **Number of CPU Cores**: Defines the number of CPU cores to be used for the GWAS analysis, enabling faster processing by splitting the workload across multiple cores.
+BIGapp uses GWASpoly random polygenic effect to control for population structure. By default, all markers are used to calculate a single covariance matrix (parameter LOCO = FALSE in GWASpoly set.k function).
+
+BIGapp tests the inclusion of principal components as fixed effects (P + K model). For that, the BIC is calculated for models including 1 to 10 of the first principal components and the kinship matrix. In this step, the mixed model for GWAS is fitted using mixed.solve function of rrBLUP. Then, using the estimated parameters, log-likelihood is calculated by using the equation (2) in Kang et al., 2008. Finally, BIC is calculated by using the standard formula (BIC = K * log(N) - 2 * LL).
+
diff --git a/inst/help_files/GWAS_res.Rmd b/inst/help_files/GWAS_res.Rmd
index 80003a4..8d11df5 100644
--- a/inst/help_files/GWAS_res.Rmd
+++ b/inst/help_files/GWAS_res.Rmd
@@ -4,16 +4,23 @@ output: html_document
date: "2024-08-29"
---
-* BIC plot
-
-* BIC Table
-
-* LD plot
-
-* Manhattan Plot
-
-* QQ Plot
-
-* QTL - significant markers
-
-* Multiple QTL model results table
+* **BIC plot**
+Plot of the BIC of the tested models including PCs and kinship. The model using the number of PC that resulted in the lower BIC is the one used by BIGapp.
+
+* **BIC Table**
+Table with BIC for the tests including PC and kinship. The model using the number of PC that resulted in the lower BIC is the one used by BIGapp.
+
+* **LD plot**
+Plot LD vs distance. A monotone decreasing, convex spline is fit using R package scam.
+
+* **Manhattan Plot**
+From GWASpoly documentation: Results for the ref and alt versions of the dominance model are combined. If data is the output from set.threshold, then the threshold is displayed as a horizontal dashed line when models contains a single model. Because the threshold varies between models, it is not drawn when multiple models are included. Although the ref and alt versions of each dominance model are slightly different (as seen with qq.plot), they are treated as a single model for the Manhattan plot, and the average threshold is shown.
+
+* **QQ Plot**
+From GWASpoly documentation: One of the standard diagnostics in GWAS is to check the inflation of the -log10(p) values (aka “scores”). This can be done using a quantile-quantile plot of the observed vs. expected values under the null hypothesis, which follows a uniform distribution and is shown with a dotted line
+
+* **QTL - significant markers**
+Describes significant markers after screening with GWASpoly function
+
+* **Multiple QTL model results table**
+Results after fit.QTL function
diff --git a/inst/help_files/Genomic_Diversity_cite.Rmd b/inst/help_files/Genomic_Diversity_cite.Rmd
index b85b481..1870dbc 100644
--- a/inst/help_files/Genomic_Diversity_cite.Rmd
+++ b/inst/help_files/Genomic_Diversity_cite.Rmd
@@ -4,3 +4,12 @@ output: html_document
date: "2024-08-29"
---
+* **BIGapp**
+
+* **BIGr**
+
+* **vcfR**
+
+Knaus BJ, Grünwald NJ (2017). “VCFR: a package to manipulate and visualize variant call format data in R.” Molecular Ecology Resources, 17(1), 44–53. ISSN 757, https://dx.doi.org/10.1111/1755-0998.12549.
+
+Knaus BJ, Grünwald NJ (2016). “VcfR: an R package to manipulate and visualize VCF format data.” BioRxiv. https://dx.doi.org/10.1101/041277.
diff --git a/inst/help_files/Genomic_Diversity_par.Rmd b/inst/help_files/Genomic_Diversity_par.Rmd
index b236539..c4581aa 100644
--- a/inst/help_files/Genomic_Diversity_par.Rmd
+++ b/inst/help_files/Genomic_Diversity_par.Rmd
@@ -4,3 +4,6 @@ output: html_document
date: "2024-08-29"
---
+* **VCF file**: Variant Call Format (VCF) is a standard file format to store genetic variant information. The genotype (GT) data within the VCF is required for the analysis in this tab. For more details about the VCF format, see this document: https://samtools.github.io/hts-specs/VCFv4.2.pdf.
+
+* **Species Ploidy**: Specifies the ploidy level of the species. The current analysis supports both diploids and autopolyploids.
diff --git a/inst/help_files/Genomic_Diversity_res.Rmd b/inst/help_files/Genomic_Diversity_res.Rmd
index 752af92..347820b 100644
--- a/inst/help_files/Genomic_Diversity_res.Rmd
+++ b/inst/help_files/Genomic_Diversity_res.Rmd
@@ -4,3 +4,38 @@ output: html_document
date: "2024-08-29"
---
+#### Dosage Ratio Plot
+
+- **Description**: The dosage ratio box plot illustrates the distribution of different genotypic dosage levels across the genomic dataset.
+- **Interpretation**: Each box in the plot corresponds to a specific dosage level, showing the proportion of samples exhibiting each dosage. These levels typically indicate the number of copies of a particular allele.
+- **Use**: This plot is beneficial in understanding the distribution of allelic dosages in the population.
+
+#### MAF Plot
+
+- **Description**: The MAF plot shows the minor allele frequency distribution for the SNPs within the dataset.
+- **Interpretation**: The x-axis represents SNPs, while the y-axis depicts the frequency of the less common allele. High MAF values indicate prevalent mutations, whereas low values suggest rarity.
+- **Use**: This visualization helps in assessing allele variation across the population, essential for breeding decisions aiming to maximize genetic diversity. It's crucial for identifying potentially beneficial minor alleles in a population genetics context or ensuring adequate filtering prior to downstream analyses such as GWAS.
+
+#### OHet Plot
+
+- **Description**: The OHet plot displays the observed heterozygosity for samples, illustrating genetic variability.
+- **Interpretation**: The y-axis may represent heterozygosity levels, with distinct individuals on the x-axis. Variation indicates the degree of genetic diversity within samples.
+- **Use**: High heterozygosity is often associated with genetic health and adaptability, making this plot valuable for breeding insight. In population genetics, it aids understanding of population diversity and potential inbreeding effects.
+
+#### Marker Distribution Plot
+
+- **Description**: This plot provides a spatial distribution of markers across different chromosomes or linkage groups.
+- **Interpretation**: Points correspond to SNPs or markers, plotted along chromosomal positions. Patterns suggest marker density and distribution.
+- **Use**: Insight into marker distribution is critical for genomic selection and breeding strategy, ensuring comprehensive genomic coverage. This is also a good visual to determine if markers were filtered too strictly or sequenced poorly for a given project.
+
+#### Sample Table
+
+- **Description**: The sample table includes a list of sample IDs with associated summary statistics, such as heterozygosity.
+- **Interpretation**: Each row details a specific sample, providing key genetic metrics valuable for assessing sample quality and genetic integrity.
+- **Use**: This table supports the evaluation of sample quality and diversity, crucial for selecting appropriate breeding candidates or conducting robust population analyses.
+
+#### SNP Table
+
+- **Description**: The SNP table features SNP IDs alongside their summary statistics, including metrics like Minor Allele Frequency (MAF) and Polymorphism Information Content (PIC).
+- **Interpretation**: The table provides insight into each SNP's genetic contribution and informativeness, facilitating genome-wide association studies and diversity assessments.
+- **Use**: By highlighting key SNP metrics, this table assists in choosing markers for selection in breeding programs and understanding diversity in population genetics.
diff --git a/inst/help_files/Genomic_Prediction_cite.Rmd b/inst/help_files/Genomic_Prediction_cite.Rmd
index 55097f3..1e59726 100644
--- a/inst/help_files/Genomic_Prediction_cite.Rmd
+++ b/inst/help_files/Genomic_Prediction_cite.Rmd
@@ -4,3 +4,18 @@ output: html_document
date: "2024-08-29"
---
+* **BIGapp**
+
+* **vcfR**
+
+Knaus BJ, Grünwald NJ (2017). “VCFR: a package to manipulate and visualize variant call format data in R.” Molecular Ecology Resources, 17(1), 44–53. ISSN 757, https://dx.doi.org/10.1111/1755-0998.12549.
+
+Knaus BJ, Grünwald NJ (2016). “VcfR: an R package to manipulate and visualize VCF format data.” BioRxiv. https://dx.doi.org/10.1101/041277.
+
+* **rrBLUP**
+
+Endelman JB (2011). “Ridge regression and other kernels for genomic selection with R package rrBLUP.” Plant Genome, 4, 250-255.
+
+* **AGHmatrix**
+
+R Amadeu R, Franco Garcia A, Munoz P, V Ferrao L (2023). “AGHmatrix: genetic relationship matrices in R .” Bioinformatics, 39(7).
diff --git a/inst/help_files/Genomic_Prediction_par.Rmd b/inst/help_files/Genomic_Prediction_par.Rmd
index b4b18cb..133627c 100644
--- a/inst/help_files/Genomic_Prediction_par.Rmd
+++ b/inst/help_files/Genomic_Prediction_par.Rmd
@@ -4,3 +4,32 @@ output: html_document
date: "2024-08-29"
---
+This tab estimates the trait and estimated-breeding-values (EBVs) for either all individuals in a genomic dataset, or by training the model with one genomic dataset to predict the values in another. The trait and EBV information can then be used to make selections for the next breeding cycle.
+
+* **VCF file**: Variant Call Format (VCF) is a standard file format to store genetic variant information. The genotype (GT) data within the VCF is required for the analysis in this tab. For more details about the VCF format, see this document: https://samtools.github.io/hts-specs/VCFv4.2.pdf.
+
+* **Passport file**: A comma-separated values (CSV) file containing individual names (Sample_ID) in the first column and phenotype values in the subsequent columns. The phenotype column names should correspond to the phenotype ID. Example:
+
+
+
+|Sample_ID | Sepal.Length| Sepal.Width| Petal.Length| Petal.Width|Species |
+|:---------:|:------------:|:-----------:|:------------:|:-----------:|:-------:|
+|Sample_1 | 5.1| 3.5| 1.4| 0.2|versicolor |
+|Sample_2 | 4.9| 3.0| 1.4| 0.2|setosa |
+|Sample_3 | 4.7| 3.2| 1.3| 0.2|setosa |
+|Sample_4 | 4.6| 3.1| 1.5| 0.2|setosa |
+|Sample_5 | 5.0| 3.6| 1.4| 0.2|setosa |
+|Sample_6 | 5.4| 3.9| 1.7| 0.4|setosa |
+
+
+
+
+
+* **Prediction VCF file**(optional): Variant Call Format (VCF). See above for required formatting details.
+
+* **Species Ploidy**: Specifies the ploidy level of the species. The current analysis supports both diploids and autopolyploids.
+
+* **Matrix type**: Specifies the matrix type to use for the GBLUP prediction model. The choices are:
+ * **Gmatrix**: An additive relationship matrix between all samples is created using the genotype information from the VCF file
+ * **Amatrix**: An additive relationship matrix between all samples is created using a user supplied pedigree file
+ * **Hmatrix**: An additive relationship matrix between all samples is created by using the information from both the VCF file and the pedigree file
diff --git a/inst/help_files/Genomic_Prediction_res.Rmd b/inst/help_files/Genomic_Prediction_res.Rmd
index c48e1ed..a028faa 100644
--- a/inst/help_files/Genomic_Prediction_res.Rmd
+++ b/inst/help_files/Genomic_Prediction_res.Rmd
@@ -4,3 +4,37 @@ output: html_document
date: "2024-08-29"
---
+* **Predicted Trait table**: The trait values are predicted for all samples in either the input VCF file (if only one provided), or for all of the samples in the predictive VCF file. It is in the format of samples IDs in the first column, and each subsequent column being the information for the traits selected by the user.
+
+
+
+| Sample ID | Sepal.Length|Sepal.Width |
+|------------|--------------|-------------|
+| Sample_1 | 4.8 | 3.5 |
+| Sample_2 | 4.9 | 3.0 |
+| Sample_3 | 4.7 | 3.2 |
+| Sample_4 | 4.6 | 3.1 |
+| Sample_5 | 5.0 | 3.6 |
+| Sample_6 | 5.4 | 3.9 |
+
+
+
+
+
+* **EBV table**: Estimated Breeding Values (EBVs) from genomic prediction are statistical estimates of an individual's genetic potential for a specific trait, calculated by combining genomic information with phenotypic and pedigree data. These values help predict an organism's ability to pass on desirable traits to its offspring, allowing for more accurate selection in breeding programs. The EBVs are predicted for all samples in either the input VCF file (if only one provided), or for all of the samples in the predictive VCF file. It is in the format of samples IDs in the first column, and each subsequent column being the information for the traits selected by the user.
+
+
+
+
+| Sample ID | Sepal.Length | Sepal.Width |
+|------------|--------------|-------------|
+| Sample_1 | 0.32 | 0.48 |
+| Sample_2 | -0.12 | -0.28 |
+| Sample_3 | 0.14 | 0.31 |
+| Sample_4 | 1.21 | 1.03 |
+| Sample_5 | 0.43 | 0.33 |
+| Sample_6 | 0.03 | 0.91 |
+
+
+
+
diff --git a/inst/help_files/PCA_cite.Rmd b/inst/help_files/PCA_cite.Rmd
index 45d5f7f..d9e2272 100644
--- a/inst/help_files/PCA_cite.Rmd
+++ b/inst/help_files/PCA_cite.Rmd
@@ -4,3 +4,13 @@ output: html_document
date: "2024-08-29"
---
+- **BIGapp**
+
+- **BIGr**
+
+- **vcfR**
+ - Knaus BJ, Grünwald NJ (2017). *VCFR: a package to manipulate and visualize variant call format data in R.* Molecular Ecology Resources, 17(1), 44–53. ISSN 757. [doi:10.1111/1755-0998.12549](https://dx.doi.org/10.1111/1755-0998.12549).
+ - Knaus BJ, Grünwald NJ (2016). *VcfR: an R package to manipulate and visualize VCF format data.* BioRxiv. [doi:10.1101/041277](https://dx.doi.org/10.1101/041277).
+
+- **AGHmatrix**
+ - Amadeu R, Franco Garcia A, Munoz P, V Ferrao L (2023). *AGHmatrix: genetic relationship matrices in R.* Bioinformatics, 39(7).
diff --git a/inst/help_files/PCA_par.Rmd b/inst/help_files/PCA_par.Rmd
index 61a0f27..61f859d 100644
--- a/inst/help_files/PCA_par.Rmd
+++ b/inst/help_files/PCA_par.Rmd
@@ -4,3 +4,39 @@ output: html_document
date: "2024-08-29"
---
+
+* **VCF file**
+Variant Call Format (VCF) is a standard file format to store genetic variant information. The genotype (GT) data within the VCF is required for the analysis in this tab. For more details about the VCF format, see this document: https://samtools.github.io/hts-specs/VCFv4.2.pdf.
+
+* **Passport file**
+A comma-separated values (CSV) file containing individual names (Sample_ID) in the first column and phenotype values in the subsequent columns. The phenotype column names should correspond to the phenotype ID. Example:
+
+
+
+|Sample_ID | Sepal.Length| Sepal.Width| Petal.Length| Petal.Width|Species |
+|:---------:|:------------:|:-----------:|:------------:|:-----------:|:-------:|
+|Sample_1 | 5.1| 3.5| 1.4| 0.2|versicolor |
+|Sample_2 | 4.9| 3.0| 1.4| 0.2|setosa |
+|Sample_3 | 4.7| 3.2| 1.3| 0.2|setosa |
+|Sample_4 | 4.6| 3.1| 1.5| 0.2|setosa |
+|Sample_5 | 5.0| 3.6| 1.4| 0.2|setosa |
+|Sample_6 | 5.4| 3.9| 1.7| 0.4|setosa |
+
+
+
+
+
+* **Species Ploidy**
+Specifies the ploidy level of the species. The current analysis supports both diploids and autopolyploids.
+
+* **Variable to color**
+Specifies which column to use in the passport data file to color the samples in the PCA plots by.
+
+* **Category to color**
+Specifies which specific item(s) within the selected column to color the samples in the PCA plots. All other samples will be a shade of grey.
+
+* **Color palette**
+Select which color palette to use for coloring the sample points. The different color palettes are separated by those that are color-blind friendly and those that are not.
+
+* **Axes to visualize**
+Choose which axes to display on the 2D PCA plot.
diff --git a/inst/help_files/PCA_res.Rmd b/inst/help_files/PCA_res.Rmd
index 1f2f534..54ac140 100644
--- a/inst/help_files/PCA_res.Rmd
+++ b/inst/help_files/PCA_res.Rmd
@@ -4,3 +4,20 @@ output: html_document
date: "2024-08-29"
---
+#### 3D PCA Plot
+
+- **Description**: The 3D PCA plot visualizes the first three principal components of genomic data, displayed in a three-dimensional space.
+- **Interpretation**: Each point represents an individual sample or genome, positioned according to its scores on the first three principal components. Clusters may indicate genetic similarities or groupings within the population.
+- **Use**: In the context of breeding decisions, this plot helps identify genetic diversity and potential outliers within a gene pool, facilitating the evaluation of genetic relationships between individuals. In broader population genetics, it aids in visualizing population structure and sub-structure.
+
+#### 2D PCA Plot
+
+- **Description**: The 2D PCA plot represents two principal components of the genomic data in a two-dimensional view.
+- **Interpretation**: Points on the plot correspond to individual genetic samples, with the axes signifying the most significant components capturing data variance. Patterns or grouping may suggest genetic linkage or divergence.
+- **Use**: For breeding applications, this plot offers a straightforward view to quickly assess genetic variance and clustering among candidate individuals. In population genetics, it helps in examining genetic differentiation or affinity among populations.
+
+#### Scree Plot
+
+- **Description**: The scree plot displays the eigenvalues of each principal component, in descending order, providing a visual summary of variance explained by each component.
+- **Interpretation**: The x-axis lists the principal components, while the y-axis shows associated eigenvalues. A significant drop in eigenvalues, or "elbow", indicates where additional components provide diminishing returns in explaining data variance.
+- **Use**: This plot assists in deciding the optimal number of principal components for analysis, striking a balance between data simplification and variance preservation. For breeding decisions, it informs which components capture most genetic variation, aiding in selection strategies. In population genetics, it helps determine the dimensional analysis required to understand genetic structure comprehensively.
diff --git a/inst/help_files/Predictive_Ability_cite.Rmd b/inst/help_files/Predictive_Ability_cite.Rmd
index 92d6654..2bdeda1 100644
--- a/inst/help_files/Predictive_Ability_cite.Rmd
+++ b/inst/help_files/Predictive_Ability_cite.Rmd
@@ -4,3 +4,18 @@ output: html_document
date: "2024-08-29"
---
+* **BIGapp**
+
+* **vcfR**
+
+Knaus BJ, Grünwald NJ (2017). “VCFR: a package to manipulate and visualize variant call format data in R.” Molecular Ecology Resources, 17(1), 44–53. ISSN 757, https://dx.doi.org/10.1111/1755-0998.12549.
+
+Knaus BJ, Grünwald NJ (2016). “VcfR: an R package to manipulate and visualize VCF format data.” BioRxiv. https://dx.doi.org/10.1101/041277.
+
+* **rrBLUP**
+
+Endelman JB (2011). “Ridge regression and other kernels for genomic selection with R package rrBLUP.” Plant Genome, 4, 250-255.
+
+* **AGHmatrix**
+
+R Amadeu R, Franco Garcia A, Munoz P, V Ferrao L (2023). “AGHmatrix: genetic relationship matrices in R .” Bioinformatics, 39(7).
diff --git a/inst/help_files/Predictive_Ability_par.Rmd b/inst/help_files/Predictive_Ability_par.Rmd
index c3eab0f..ce189f2 100644
--- a/inst/help_files/Predictive_Ability_par.Rmd
+++ b/inst/help_files/Predictive_Ability_par.Rmd
@@ -4,3 +4,21 @@ output: html_document
date: "2024-08-29"
---
+This tab provides the predictive ability of a GBLUP model for each trait across all samples within a genomic dataset. The model is based on a 5-fold cross validation, where the samples are evenly grouped into 5 groups, and 4 of the groups are used to train the GBLUP model, while the trait is predicted for the 5th group. This continues until each group has had their trait information predicted, and the predictive ability is the pearson correlation between the known trait values and the predicted values. Each 5-fold cross-validation can be performed multiple times (iterations) to get a more confident estimate in the predictive ability of the model. This supports the use of genomic and pedigree information.
+
+
+* **VCF file**: Variant Call Format (VCF) is a standard file format to store genetic variant information. The genotype (GT) data within the VCF is required for the analysis in this tab. For more details about the VCF format, see this document: https://samtools.github.io/hts-specs/VCFv4.2.pdf.
+
+* **Passport file**: A comma-separated values (CSV) file containing individual names (Sample_ID) in the first column and phenotype values in the subsequent columns. The phenotype column names should correspond to the phenotype ID. Example:
+
+* **Species ploidy**: Specifies the ploidy level of the species. The current analysis supports both diploids and autopolyploids.
+
+* **Iterations**: This is the number of runs of five-fold cross-validation that you would like to perform to estimate predictive ability. The accuracy results are averaged over all iterations. The more iterations that are performed, the higher confidence in the final predictive ability estimates.
+
+* **Matrix type**: Specifies the matrix type to use for the GBLUP prediction model. The choices are:
+
+ * **Gmatrix**: An additive relationship matrix between all samples is created using the genotype information from the VCF file
+
+ * **Amatrix**: An additive relationship matrix between all samples is created using a user supplied pedigree file
+
+ * **Hmatrix**: An additive relationship matrix between all samples is created by using the information from both the VCF file and the pedigree file
diff --git a/inst/help_files/Predictive_Ability_res.Rmd b/inst/help_files/Predictive_Ability_res.Rmd
index 14e1952..67b01e7 100644
--- a/inst/help_files/Predictive_Ability_res.Rmd
+++ b/inst/help_files/Predictive_Ability_res.Rmd
@@ -4,3 +4,32 @@ output: html_document
date: "2024-08-29"
---
+#### Violin Plot
+
+- **Description**: The violin plot showcases the distribution of Pearson correlation coefficients between predicted and known phenotype values for each group in a five-fold cross-validation, repeated across the user selected number of iterations.
+- **Interpretation**: The shape and width of the violins illustrate the distribution and density of correlation values, while individual points represent specific correlation outcomes for each cross-validation split (total points = 5*# of iters).
+- **Use**: This plot is vital for assessing the variability and robustness of predictive models in genomic selection, highlighting how well genotype data can predict phenotypes.
+
+#### Box Plot
+
+- **Description**: The box plot is similar to the violin plot, summarizing the Pearson correlation coefficients without displaying individual data points.
+- **Interpretation**: The box plot shows the median, interquartile range, and potential outliers of the correlation values, providing a concise view of prediction accuracy across all cross-validation folds.
+- **Use**: This visualization is useful for quickly comparing the central tendency and variability of predictive performance across different traits or models, facilitating quick assessments of model reliability and effectiveness in genomic selection.
+
+#### Predictive Ability Table
+
+- **Description**: The predictive ability table summarizes the Pearson correlations (Predictive Ability) for each iteration, organized by specific traits of interest, such as Sepal Length and Sepal Width.
+- **Interpretation**: Each row represents a different iteration, with columns providing correlation values for each phenotypic trait. Higher values indicate better predictive performance of the genomic selection model.
+- **Use**: This table serves as a detailed summary of predictive ability across iterations, allowing researchers to evaluate model performance over repeated trials. It is crucial for determining model reliability and can be used to generate custom figures.
+
+- **Example**:
+
+
+
+|Iter | Sepal.Length | Sepal.Width |
+|:---------:|:------------:|:-----------:|
+|1 | 0.728| 0.571|
+|2 | 0.721| 0.568|
+|3 | 0.724| 0.543|
+
+
diff --git a/inst/help_files/Updog_Dosage_Calling_cite.Rmd b/inst/help_files/Updog_Dosage_Calling_cite.Rmd
index 21e35d4..e7ca342 100644
--- a/inst/help_files/Updog_Dosage_Calling_cite.Rmd
+++ b/inst/help_files/Updog_Dosage_Calling_cite.Rmd
@@ -4,13 +4,14 @@ output: html_document
date: "2024-08-29"
---
-* **BIGapp**
-
-
-* **Updog package**
-
-Gerard, D., Ferrão, L. F. V., Garcia, A. A. F., & Stephens, M. (2018). Genotyping Polyploids from Messy Sequencing Data. Genetics, 210(3), 789-807. doi: 10.1534/genetics.118.301468.
-
-If you used the “norm” model cite also:
-
-Gerard D, Ferrão L (2020). “Priors for Genotyping Polyploids.” Bioinformatics, 36(6), 1795-1800. ISSN 1367-4803, doi: 10.1093/bioinformatics/btz852.
+- **BIGapp**
+
+- **BIGr**
+
+- **Updog Package**
+
+ Gerard, D., Ferrão, L. F. V., Garcia, A. A. F., & Stephens, M. (2018). *Genotyping Polyploids from Messy Sequencing Data*. Genetics, 210(3), 789-807. [doi: 10.1534/genetics.118.301468](https://doi.org/10.1534/genetics.118.301468).
+
+ If you used the “norm” model, cite also:
+
+ Gerard D, Ferrão L (2020). *Priors for Genotyping Polyploids*. Bioinformatics, 36(6), 1795-1800. ISSN 1367-4803, [doi: 10.1093/bioinformatics/btz852](https://doi.org/10.1093/bioinformatics/btz852).
diff --git a/inst/help_files/Updog_Dosage_Calling_par.Rmd b/inst/help_files/Updog_Dosage_Calling_par.Rmd
index 40305c8..f80e6ec 100644
--- a/inst/help_files/Updog_Dosage_Calling_par.Rmd
+++ b/inst/help_files/Updog_Dosage_Calling_par.Rmd
@@ -11,7 +11,8 @@ date: "2024-08-29"
* **VCF file**:
Variant Call Format (VCF) is a standard file format to store genetic variant information. The genotype (GT) data within the VCF is required for the analysis in this tab. For more details about the VCF format, see this document: https://samtools.github.io/hts-specs/VCFv4.2.pdf.
-* **Passport File**: A comma-separated values (CSV) file containing individual names (Sample_ID) in the first column and phenotype values in the subsequent columns. The phenotype column names should correspond to the phenotype ID.
+* **Passport File**:
+A comma-separated values (CSV) file containing individual names (Sample_ID) in the first column and phenotype values in the subsequent columns. The phenotype column names should correspond to the phenotype ID.
* **Select Category Subset**: After loading the passport file, this option will be available. You can select the column name to base the subsetting for the samples
@@ -45,6 +46,7 @@ The following information is from the Updog manual. Possible values of the genot
`f1` This prior assumes the individuals are all full-siblings resulting from one generation of a bi-parental cross. This model assumes a particular type of meiotic behavior: polysomic inheritance with bivalent, non-preferential pairing. `f1pp` This prior allows for double reduction and preferential pairing in an F1 population of tretraploids. `s1pp` This prior allows for double reduction and preferential pairing in an S1 population of tretraploids. `flex` Generically any categorical distribution. Theoretically, this works well if you have a lot of individuals. In practice, it seems to be much less robust to violations in modeling assumptions.`uniform` A discrete uniform distribution. This should never be used in practice."
* **Parent**: If “s1” or “s1pp” model is selected you must define which sample is correspondent to the parent including the sample ID in this box. The input sample ID must match to the sample ID in the input genotype file
+
* **Parent1 and Parent2**: if “f1” or “f1pp” model is selected you must define which samples correspondent to the parent1 and parent2 including the samples ID in the respective boxes. The input sample ID must match to the sample ID in the input genotype file
* **Number of CPU Cores**: Number of cores to be used in the multidog function paralelization
diff --git a/inst/help_files/VCF_Filtering_cite.Rmd b/inst/help_files/VCF_Filtering_cite.Rmd
index 0699277..390e900 100644
--- a/inst/help_files/VCF_Filtering_cite.Rmd
+++ b/inst/help_files/VCF_Filtering_cite.Rmd
@@ -4,3 +4,14 @@ output: html_document
date: "2024-08-29"
---
+
+- **BIGapp**
+
+- **BIGr**
+
+- **Updog** (if filtering parameters used)
+ Gerard, D., Ferrão, L. F. V., Garcia, A. A. F., & Stephens, M. (2018). *Genotyping Polyploids from Messy Sequencing Data*. Genetics, 210(3), 789-807. [doi: 10.1534/genetics.118.301468](https://doi.org/10.1534/genetics.118.301468).
+
+- **vcfR**
+ - Knaus BJ, Grünwald NJ (2017). *vcfR: a package to manipulate and visualize variant call format data in R.* Molecular Ecology Resources, 17(1), 44–53. ISSN 757, [doi:10.1111/1755-0998.12549](https://dx.doi.org/10.1111/1755-0998.12549).
+ - Knaus BJ, Grünwald NJ (2016). *VcfR: an R package to manipulate and visualize VCF format data.* BioRxiv. [doi:10.1101/041277](https://dx.doi.org/10.1101/041277).
diff --git a/inst/help_files/VCF_Filtering_par.Rmd b/inst/help_files/VCF_Filtering_par.Rmd
index 2abbd30..d845367 100644
--- a/inst/help_files/VCF_Filtering_par.Rmd
+++ b/inst/help_files/VCF_Filtering_par.Rmd
@@ -3,3 +3,23 @@ title: "VCF_Filtering_par"
output: html_document
date: "2024-08-29"
---
+
+
+* **VCF file**: Variant Call Format (VCF) is a standard file format to store genetic variant information. The genotype (GT) data within the VCF is required for the analysis in this tab. For more details about the VCF format, see this document: https://samtools.github.io/hts-specs/VCFv4.2.pdf.
+
+* **Species Ploidy**: Specifies the ploidy level of the species. The current analysis supports both diploids and autopolyploids.
+
+* **Minor-Allele-Frequency**: The frequency of the minor allele within the population for each SNP. SNPs with a very low MAF (MAF < 0.01) are typically removed since they could be due to sequencing errors and could bias the GWAS and PCA analyses.
+
+* **Read Depth per marker/sample**: This the read depth for each marker at each sample. A low read depth suggests that a given marker at a sample had poor genotyping performance, and should be assigned as missing. Typical read depth thresholds are set so that genotypes with a read depth per marker/sample of less than 10 are assigned as missing data.
+
+* **SNP missing data**: The ratio of missing data across all samples for each SNP. Low missing data (minimum <= 50%) is necessary to not bias and have confidence in the downstream results.
+
+* **Sample missing data**: The ratio of missing data across all SNPs for each sample. Low missing data (minimum <= 50%) is necessary to not bias and have confidence in the downstream results.
+
+* **Updog parameters**:
+
+ * **OD**: The estimated overdispersion parameter of the SNP from updog
+ * **Bias**: The estimated allele bias of the SNP from updog
+ * **Prop_mis**: The estimated proportion of individuals misclassified in the SNP from updog
+ * **Maxpostprob**: Maximum posterior probability for that dosage call from updog
diff --git a/inst/help_files/VCF_Filtering_res.Rmd b/inst/help_files/VCF_Filtering_res.Rmd
index e3430b6..8243c09 100644
--- a/inst/help_files/VCF_Filtering_res.Rmd
+++ b/inst/help_files/VCF_Filtering_res.Rmd
@@ -4,3 +4,6 @@ output: html_document
date: "2024-08-29"
---
+* **VCF file (v4.3)**
+
+Variant Call Format (VCF) is a standard file format to store genetic variant information. The genotype (GT) data within the VCF is required for the analysis in this tab. For more details about the VCF format, see this document: https://samtools.github.io/hts-specs/VCFv4.2.pdf.
diff --git a/tests/testthat/test-DosageCall.R b/tests/testthat/test-DosageCall.R
index b49b881..a3ac1d0 100644
--- a/tests/testthat/test-DosageCall.R
+++ b/tests/testthat/test-DosageCall.R
@@ -6,6 +6,7 @@ context("Dosage Calling")
test_that("Dosage Calling from MADC file",{
madc_file <- system.file("iris_DArT_MADC.csv", package="BIGapp")
+
output_name <- "output"
ploidy <- 2
cores <- 2
diff --git a/tests/testthat/test-GWAS.R b/tests/testthat/test-GWAS.R
index a2d9420..d56dd36 100644
--- a/tests/testthat/test-GWAS.R
+++ b/tests/testthat/test-GWAS.R
@@ -82,6 +82,22 @@ test_that("test GWAS",{
stop("Wrong file format")
}
+ LD_plot <- LD.plot(data)
+
+ lim.d <- max(LD_plot$data$d)
+
+ input$bp_window <- 5e6
+
+ if(input$bp_window > lim.d) {
+ safeError("Base pair window larger than maximum distance. Reduce window size.")
+ p <- LD_plot
+ } else {
+ p <- LD_plot + geom_vline(aes(xintercept=input$bp_window, color = "bp window"),linetype="dashed") +
+ theme(legend.title=element_blank(), legend.position="top")
+ }
+
+ print(p)
+
data.loco <- set.K(data,LOCO=F,n.core= as.numeric(cores))
#Delete temp pheno file
@@ -170,6 +186,18 @@ test_that("test GWAS",{
qtl <- get.QTL(data=data2,traits=colnames(data@pheno[i]),bp.window=5e6)
qtl_d <- data.frame(qtl)
+ if(length(qtl$Model) >0){
+ rm.qtl <- which(qtl$Model %in% c("diplo-general", "diplo-additive"))
+ if(length(rm.qtl) > 0){
+ warning("QTL detected by the models diplo-general and diplo-additive are not supported in the fit.QTL current version")
+ qtl <- qtl[-rm.qtl,]
+ }
+
+ fit.ans_temp <- fit.QTL(data=data2,
+ trait=input$trait_info,
+ qtl=qtl[,c("Marker","Model")])
+ }
+
#get qqplot
data_qq <- cbind.data.frame(SNP=data.loco.scan@map$Marker,Chr=data.loco.scan@map$Chrom, Pos=data.loco.scan@map$Position,10^(-data.loco.scan@scores[[colnames(data@pheno[i])]]))
@@ -195,3 +223,4 @@ test_that("test GWAS",{
}
})
+
diff --git a/tests/testthat/test-PCA.R b/tests/testthat/test-PCA.R
index 1d2aeff..1deda9a 100644
--- a/tests/testthat/test-PCA.R
+++ b/tests/testthat/test-PCA.R
@@ -67,6 +67,11 @@ test_that("test PCA",{
# Print the modified dataframe
row.names(info_df) <- info_df[,1]
+ # Check ploidy
+ if(input$pca_ploidy != max(genomat)){
+ stop("Wrong ploidy")
+ }
+
#Plotting
#First build a relationship matrix using the genotype values
G.mat.updog <- AGHmatrix::Gmatrix(t(genomat), method = "VanRaden", ploidy = as.numeric(ploidy), missingValue = "NA")
diff --git a/tests/testthat/test-diversity.R b/tests/testthat/test-diversity.R
index 82b7444..ce164f9 100644
--- a/tests/testthat/test-diversity.R
+++ b/tests/testthat/test-diversity.R
@@ -70,7 +70,7 @@ test_that("test diversity",{
axis.title = element_text(size = 14)
)
- hist(diversity_items$het_df$ObservedHeterozygosity, breaks = as.numeric(input$hist_bins), col = "tan3", border = "black", xlim= c(0,1),
+ hist(diversity_items$het_df$Ho, breaks = as.numeric(input$hist_bins), col = "tan3", border = "black", xlim= c(0,1),
xlab = "Observed Heterozygosity",
ylab = "Number of Samples",
main = "Sample Observed Heterozygosity")