From 9813e52c6db98ec0f1ad155e49a9d086cb3b5528 Mon Sep 17 00:00:00 2001 From: alex-sandercock Date: Fri, 20 Dec 2024 16:04:06 -0500 Subject: [PATCH] GS and UI updates --- R/GS_functions.R | 4 +- R/app_ui.R | 2 +- R/mod_Filtering.R | 4 +- R/mod_GS.R | 475 ++++++++++++++++++++-------------------------- R/mod_GSAcc.R | 11 +- R/mod_help.R | 4 +- 6 files changed, 224 insertions(+), 276 deletions(-) diff --git a/R/GS_functions.R b/R/GS_functions.R index 03b5e4c..79a98da 100644 --- a/R/GS_functions.R +++ b/R/GS_functions.R @@ -311,9 +311,9 @@ GBLUP_genomic_prediction <- function(pheno_dat, Geno.mat, cycles, folds, traits, results[(((r-1)*5)+fold), (length(traits)+2)] <- fold # Extract GEBVs - GEBVs_fold[, trait_idx] <- traitpred$g[test] #Confirm it is accuract to calculate the GEBVs for testing group from the trained model + GEBVs_fold[, trait_idx] <- traitpred$g[test] - # Calculate heritability (these are wrong) + # Calculate heritability (*confirm this calculation* - either way will not report to user) Vu <- traitpred$Vg Ve <- traitpred$Ve heritability_scores[(((r-1)*5)+fold), trait_idx] <- Vu / (Vu + Ve) diff --git a/R/app_ui.R b/R/app_ui.R index 37f0d29..c9b058b 100644 --- a/R/app_ui.R +++ b/R/app_ui.R @@ -57,7 +57,7 @@ app_ui <- function(request) { menuItem("Home", tabName = "welcome", icon = icon("house"),startExpanded = FALSE), tags$li(class = "header", style = "color: grey; margin-top: 18px; margin-bottom: 10px; padding-left: 15px;", "Genotype Processing"), menuItem("Convert to VCF", tabName = "dosage2vcf", icon = icon("share-from-square")), - menuItem("Updog Dosage Calling", tabName = "updog", icon = icon("list-ol")), + menuItem("Dosage Calling", tabName = "updog", icon = icon("list-ol")), menuItem("VCF Filtering", tabName = "filtering", icon = icon("filter")), tags$li(class = "header", style = "color: grey; margin-top: 18px; margin-bottom: 10px; padding-left: 15px;", "Summary Metrics"), menuItem("Genomic Diversity", tabName = "diversity", icon = icon("chart-pie")), diff --git a/R/mod_Filtering.R b/R/mod_Filtering.R index 76a8be1..2c8308a 100644 --- a/R/mod_Filtering.R +++ b/R/mod_Filtering.R @@ -40,9 +40,9 @@ mod_Filtering_ui <- function(id){ numericInput(ns("maxpostprob_filter"), "Minimum maxpostprob (Updog filter)", min = 0, value = 0.5, step = 0.1) ) ), - actionButton(ns("run_filters"), "Apply filters"), + actionButton(ns("run_filters"), "Apply Filters"), useShinyjs(), - downloadButton(ns("start_updog_filter"), "Download Filtered VCF", icon = icon("download"), class = "butt"), + downloadButton(ns("start_updog_filter"), "Download", icon = icon("download"), class = "butt"), div(style="display:inline-block; float:right",dropdownButton( tags$h3("Updog Filter Parameters"), "You can download examples of the expected file here: \n", diff --git a/R/mod_GS.R b/R/mod_GS.R index f59a45d..933bb70 100644 --- a/R/mod_GS.R +++ b/R/mod_GS.R @@ -20,13 +20,13 @@ mod_GS_ui <- function(id){ fluidRow( column(width = 3, box(title="Inputs", width = 12, collapsible = TRUE, collapsed = FALSE, status = "info", solidHeader = TRUE, - fileInput(ns("pred_known_file"), "Choose Training VCF File", accept = c(".csv",".vcf",".gz")), - fileInput(ns("pred_trait_file"), "Choose Trait File", accept = ".csv"), - fileInput(ns("pred_est_file"), "Choose Prediction VCF File", accept = c(".csv",".vcf",".gz")), - numericInput(ns("pred_est_ploidy"), "Species Ploidy", min = 1, value = NULL), + "* Required", + fileInput(ns("pred_known_file"), "Choose VCF File*", accept = c(".csv",".vcf",".gz")), + fileInput(ns("pred_trait_file"), "Choose Trait File*", accept = ".csv"), + numericInput(ns("pred_est_ploidy"), "Species Ploidy*", min = 1, value = NULL), virtualSelectInput( inputId = ns("pred_trait_info2"), - label = "Select Trait (eg, Color):", + label = "Select Trait*", choices = NULL, showValueAsTags = TRUE, search = TRUE, @@ -34,12 +34,26 @@ mod_GS_ui <- function(id){ ), virtualSelectInput( inputId = ns("pred_fixed_info2"), - label = "Select Fixed Effects (optional) (not validated):", + label = span("Select Fixed Effects", bs4Badge("beta", position = "right", color = "success")), choices = NULL, showValueAsTags = TRUE, search = TRUE, multiple = TRUE ), + conditionalPanel( + condition = "input.pred_fixed_info2.length > 0", ns = ns, + div( + "(unselected will be considered covariates)", + virtualSelectInput( + inputId = ns("pred_fixed_cat2"), + label = "Select Categorical Fixed Effects", + choices = NULL, + showValueAsTags = TRUE, + search = TRUE, + multiple = TRUE + ) + ) + ), actionButton(ns("prediction_est_start"), "Run Analysis"), div(style="display:inline-block; float:right",dropdownButton( tags$h3("GP Parameters"), @@ -67,8 +81,8 @@ mod_GS_ui <- function(id){ column(width = 6, box(title = "Results", status = "info", solidHeader = FALSE, width = 12, height = 600, maximizable = T, bs4Dash::tabsetPanel( - tabPanel("Predicted Trait Table", DTOutput(ns("pred_trait_table")), style = "overflow-y: auto; height: 500px"), - tabPanel("GEBVs Table", DTOutput(ns("pred_gebvs_table2")),style = "overflow-y: auto; height: 500px") + tabPanel("Predicted Pheno Table", DTOutput(ns("pred_trait_table")), style = "overflow-y: auto; height: 500px"), + tabPanel("EBVs Table", DTOutput(ns("pred_gebvs_table2")),style = "overflow-y: auto; height: 500px") ) ) @@ -77,7 +91,7 @@ mod_GS_ui <- function(id){ column(width = 3, valueBoxOutput("shared_snps", width = NULL), box(title = "Status", width = 12, collapsible = TRUE, status = "info", - progressBar(id = "pb_gp", value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ") + progressBar(id = ns("pb_gp"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ") ), box(title = "Plot Controls", status = "warning", solidHeader = TRUE, collapsible = TRUE, width = 12, div(style="display:inline-block; float:left",dropdownButton( @@ -117,6 +131,7 @@ mod_GS_server <- function(input, output, session, parent_session){ advanced_options_pred <- reactiveValues( pred_model = "GBLUP", pred_matrix = "Gmatrix", + pred_est_file = NULL, ped_file = NULL ) @@ -134,13 +149,21 @@ mod_GS_server <- function(input, output, session, parent_session){ } }) + output$uploaded_file_name_pred <- renderText({ + if (!is.null(advanced_options_pred$pred_est_file)) { + paste("Previously uploaded file:", advanced_options_pred$pred_est_file$name) + } else { + "" # Return an empty string if no file has been uploaded + } + }) + #UI popup window for input observeEvent(input$advanced_options_pred, { showModal(modalDialog( title = "Advanced Options (beta)", selectInput( inputId = ns('pred_model'), - label = 'Model Choice', + label = 'Method Choice', choices = c("GBLUP"), selected = advanced_options_pred$pred_model # Initialize with stored value ), @@ -149,12 +172,23 @@ mod_GS_server <- function(input, output, session, parent_session){ div( selectInput( inputId = ns('pred_matrix'), - label = 'GBLUP Matrix Choice', - choices = c("Gmatrix", "Amatrix", "Hmatrix"), + label = 'Relationship Matrix Choice', + #choices = c("Gmatrix", "Amatrix", "Hmatrix"), + choices = c("Gmatrix"), selected = advanced_options_pred$pred_matrix # Initialize with stored value ) ) ), + conditionalPanel( + condition = "input.pred_matrix != 'Amatrix'", ns = ns, + div( + fileInput(ns("pred_est_file"), "Choose Prediction VCF", accept = c(".vcf",".gz")), + conditionalPanel( + condition = "output.uploaded_file_name_pred !== ''", # Show only if there's content + textOutput(ns("uploaded_file_name_pred")) # Display the uploaded file name + ) + ) + ), conditionalPanel( condition = "input.pred_matrix != 'Gmatrix'", ns = ns, div( @@ -167,7 +201,7 @@ mod_GS_server <- function(input, output, session, parent_session){ ), footer = tagList( modalButton("Close"), - actionButton(ns("save_advanced_options"), "Save") + actionButton(ns("save_advanced_options_pred"), "Save") ) )) }) @@ -175,10 +209,11 @@ mod_GS_server <- function(input, output, session, parent_session){ #Close popup window when user "saves options" - observeEvent(input$save_advanced_options, { - advanced_options$pred_model <- input$pred_model - advanced_options$pred_matrix <- input$pred_matrix - advanced_options$ped_file <- input$ped_file + observeEvent(input$save_advanced_options_pred, { + advanced_options_pred$pred_model <- input$pred_model + advanced_options_pred$pred_matrix <- input$pred_matrix + advanced_options_pred$pred_est_file <- input$pred_est_file + advanced_options_pred$ped_file <- input$ped_file # Save other inputs as needed removeModal() # Close the modal after saving @@ -196,12 +231,12 @@ mod_GS_server <- function(input, output, session, parent_session){ info_df2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, nrow = 0) trait_var2 <- colnames(info_df2) trait_var2 <- trait_var2[2:length(trait_var2)] - #updateSelectInput(session, "pred_trait_info", choices = c("All", trait_var)) updateVirtualSelect("pred_fixed_info2", choices = trait_var2, session = session) updateVirtualSelect("pred_trait_info2", choices = trait_var2, session = session) - - #output$passport_table <- renderDT({info_df}, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 4) - #) + }) + + observeEvent(input$pred_fixed_info2, { + updateVirtualSelect("pred_fixed_cat2", choices = input$pred_fixed_info2, session = session) }) #2) Error check for prediction and save input files @@ -212,7 +247,8 @@ mod_GS_server <- function(input, output, session, parent_session){ est_geno_input = NULL, shared_snps = NULL, pred_genos = NULL, - pred_geno_pheno = NULL + pred_geno_pheno = NULL, + matrix = NULL ) pred_outputs2 <- reactiveValues( @@ -241,7 +277,7 @@ mod_GS_server <- function(input, output, session, parent_session){ toggleClass(id = "pred_est_ploidy", class = "borderred", condition = (is.na(input$pred_est_ploidy) | is.null(input$pred_est_ploidy))) - if (is.null(input$pred_known_file$datapath) | is.null(input$pred_est_file$datapath) | is.null(input$pred_trait_file$datapath)) { + if (is.null(input$pred_known_file$datapath) | is.null(input$pred_trait_file$datapath)) { shinyalert( title = "Missing input!", text = "Upload VCF and phenotype files", @@ -257,7 +293,7 @@ mod_GS_server <- function(input, output, session, parent_session){ animation = TRUE ) } - req(input$pred_known_file$datapath, input$pred_est_file$datapath, input$pred_trait_file$datapath, input$pred_est_ploidy) + req(input$pred_known_file$datapath, input$pred_trait_file$datapath, input$pred_est_ploidy) #Status updateProgressBar(session = session, id = "pb_gp", value = 5, title = "Checking input files") @@ -278,7 +314,7 @@ mod_GS_server <- function(input, output, session, parent_session){ # If condition is met, show notification toast shinyalert( - title = "Oops", + title = "Missing input!", text = "No traits were selected", size = "xs", closeOnEsc = TRUE, @@ -330,19 +366,27 @@ mod_GS_server <- function(input, output, session, parent_session){ } #Convert VCF file if submitted - train_vcf <- vcfR::read.vcfR(train_geno_path) - est_vcf <- vcfR::read.vcfR(est_geno_path) + train_vcf <- vcfR::read.vcfR(train_geno_path, verbose = FALSE) + if (is.null(est_geno_path) || is.na(est_geno_path)){ + est_vcf <- NULL + } else { + est_vcf <- vcfR::read.vcfR(est_geno_path, verbose = FALSE) + } #Get number of SNPs #pred_inputs$pred_snps <- nrow(vcf) #Extract GT + if (is.null(est_vcf)) { + est_geno <- NULL + } else { + est_geno <- extract.gt(est_vcf, element = "GT") + est_geno <- apply(est_geno, 2, convert_to_dosage) + class(est_geno) <- "numeric" + } train_geno <- extract.gt(train_vcf, element = "GT") train_geno <- apply(train_geno, 2, convert_to_dosage) - est_geno <- extract.gt(est_vcf, element = "GT") - est_geno <- apply(est_geno, 2, convert_to_dosage) class(train_geno) <- "numeric" - class(est_geno) <- "numeric" rm(train_vcf) rm(est_vcf) @@ -350,7 +394,7 @@ mod_GS_server <- function(input, output, session, parent_session){ # If condition is met, show notification toast shinyalert( - title = "Oops", + title = "File Error", text = "No valid genotype file detected", size = "xs", closeOnEsc = TRUE, @@ -369,9 +413,6 @@ mod_GS_server <- function(input, output, session, parent_session){ return() } - #Save number of samples in file - #pred_inputs$pred_genos <- ncol(geno) - #Check that the ploidy entered is correct if (ploidy != max(train_geno, na.rm = TRUE)) { # If condition is met, show notification toast @@ -407,7 +448,11 @@ mod_GS_server <- function(input, output, session, parent_session){ #tranforming genotypes train_geno_adj_init <- convert_genotype(train_geno, as.numeric(ploidy)) - est_geno_adj_init <- convert_genotype(est_geno, as.numeric(ploidy)) + if (is.null(est_geno)) { + est_geno_adj_init <- NULL + } else { + est_geno_adj_init <- convert_genotype(est_geno, as.numeric(ploidy)) + } #Make sure the trait file and genotype file are in the same order # Column names for geno (assuming these are the individual IDs) @@ -424,8 +469,8 @@ mod_GS_server <- function(input, output, session, parent_session){ # If condition is met, show notification toast shinyalert( - title = "Oops", - text = "All samples were missing from the phenotype file", + title = "Data Mismatch", + text = "All genotyped samples were missing from the phenotype file", size = "xs", closeOnEsc = TRUE, closeOnClickOutside = FALSE, @@ -446,8 +491,8 @@ mod_GS_server <- function(input, output, session, parent_session){ } else if (length(common_ids) < length(colnames_geno)) { # If condition is met, show notification toast shinyalert( - title = "Data Mismatch", - text = paste0((length(colnames_geno)-length(common_ids))," samples were removed for not having trait information"), + title = "Data Check", + text = paste0((length(common_ids))," samples in VCF File have trait information"), size = "xs", closeOnEsc = FALSE, closeOnClickOutside = FALSE, @@ -498,32 +543,45 @@ mod_GS_server <- function(input, output, session, parent_session){ } } ) - - - # Subset and reorder geno and pheno to ensure they only contain and are ordered by common IDs - train_geno_adj <- train_geno_adj_init[, common_ids] # Assuming that the columns can be directly indexed by IDs - pheno2 <- pheno2[match(common_ids, ids_pheno), ] - - #Save to reactive values - pred_inputs2$pheno_input <- pheno2 - #pred_inputs$geno_adj_input <- geno_adj - - #Match training and testing genotype file SNPs - common_markers <- intersect(rownames(train_geno_adj), rownames(est_geno_adj_init)) - train_geno_adj <- train_geno_adj[common_markers, ] - est_geno_adj_init <- est_geno_adj_init[common_markers, ] + + #Status + updateProgressBar(session = session, id = "pb_gp", value = 40, title = "Generating Matrices") + + #Create relationship matrix depending on the input VCF files + if (is.null(advanced_options_pred$pred_est_file)) { + #Subset phenotype file by common IDs + pheno2 <- pheno2[common_ids, ] + + #Matrix + Kin_mat <- A.mat(t(train_geno_adj_init), max.missing=NULL,impute.method="mean",return.imputed=FALSE) + + } else{ + #Subset phenotype file and training file by common IDs + pheno2 <- pheno2[common_ids, ] + + #Match training and testing genotype file SNPs + common_markers <- intersect(rownames(train_geno_adj_init), rownames(est_geno_adj_init)) + #first remove samples from training genotype matrix that are not in the phenotype file + train_geno_adj <- train_geno_adj_init[common_markers, common_ids] + #Merge the training and prediction genotype matrices + est_geno_adj_init <- est_geno_adj_init[common_markers, ] + train_pred_geno <- cbind(train_geno_adj, est_geno_adj_init) + + #Matrix + Kin_mat <- A.mat(t(train_pred_geno), max.missing=NULL,impute.method="mean",return.imputed=FALSE) + + } #Save to reactive values pred_inputs2$shared_snps <- length(common_markers) - pred_inputs2$train_geno_input <- train_geno_adj - pred_inputs2$est_geno_input <- est_geno_adj_init - + pred_inputs2$matrix <- Kin_mat + pred_inputs2$pheno_input <- pheno2 }) #3) Analysis only proceeds once continue_prediction is converted to TRUE observe({ - req(continue_prediction2(),pred_inputs2$pheno_input, pred_inputs2$train_geno_input) + req(continue_prediction2(),pred_inputs2$pheno_input, pred_inputs2$matrix) # Stop analysis if cancel was selected if (isFALSE(continue_prediction2())) { @@ -532,220 +590,80 @@ mod_GS_server <- function(input, output, session, parent_session){ #Variables ploidy <- as.numeric(input$pred_est_ploidy) - train_geno_adj <- pred_inputs2$train_geno_input - est_geno_adj <- pred_inputs2$est_geno_input - pheno <- pred_inputs2$pheno_input + gmat <- pred_inputs2$matrix + pheno2 <- pred_inputs2$pheno_input traits <- input$pred_trait_info2 - #CVs <- as.numeric(input$pred_cv) - #train_perc <- as.numeric(input$pred_folds) - fixed_traits <- input$pred_fixed_info2 - cores <- input$pred_cores - - ##Need to add ability for the use of parallelism for the for cross-validation - ##Example at this tutorial also: https://www.youtube.com/watch?v=ARWjdQU6ays - - # Function to perform genomic prediction - ##Make sure this is correct (I think I need to be generating a relationship matrix A.mat() to account for missing data, but I am not sure how that works with this) - genomic_prediction2 <- function(train_geno,est_geno, Pheno, traits, fixed_effects = NULL, cores = 1) { - - # Define variables - traits <- traits - #cycles <- as.numeric(Iters) - #Folds <- as.numeric(Fold) - total_population <- ncol(train_geno) - #train_size <- floor(percentage / 100 * total_population) - fixed_traits <- fixed_effects - cores <- as.numeric(cores) - - # Initialize a list to store GEBVs for all traits and cycles - GEBVs <- list() - - #Cross validation number for progress bar (not involved in the calculations, just shiny visuals) - pb_value = 10 - - #Remove the fixed traits from the Pheno file - if (length(fixed_traits) == 0) { - Pheno <- Pheno - } else { - #Subset fixed traits - Fixed <- subset(Pheno, select = fixed_traits) - - #Pheno <- subset(Pheno, select = -fixed_traits) - convert_all_to_factor_if_not_numeric <- function(df) { - for (col in names(df)) { - if (!is.numeric(df[[col]]) && !is.integer(df[[col]])) { - df[[col]] <- as.factor(df[[col]]) - } - } - return(df) - } - # Convert all columns to factor if they are not numeric or integer - Fixed <- convert_all_to_factor_if_not_numeric(Fixed) - - #Fixed <- as.data.frame(lapply(Fixed, as.factor)) #convert to factor - row.names(Fixed) <- row.names(Pheno) - - #Make the matrix - formula_str <- paste("~", paste(fixed_traits, collapse = " + ")) - formula <- as.formula(formula_str) - - # Create the design matrix using the constructed formula - Fixed <- model.matrix(formula, data = Fixed) - } - - #Make kinship matrix of all individuals? - #Kin_mat <- A.mat(t(geno), n.core = 1) ##Need to explore whether or not to use a Kinship matrix and if it makes a significant improvement to accuracy - #If wanting to use Kkinship matrix, will then need to see how to implement it here - - #For now, I am just imputing the missing sites using mean, but EM is more accurate, but slower (can use multiple cores). - impute = (A.mat(t(train_geno), max.missing=1,impute.method="mean",return.imputed=TRUE)) - train_geno <- impute$imputed - impute = (A.mat(t(est_geno), max.missing=1,impute.method="mean",return.imputed=TRUE)) - est_geno <- impute$imputed - - #Match training and testing genotype file SNPs - common_markers <- intersect(colnames(train_geno), colnames(est_geno)) - train_geno <- train_geno[ ,common_markers] - est_geno <- est_geno[ ,common_markers] - - #Calculate predicted traits and GEBVs - #fold_ids <- sample(rep(1:Folds, length.out = total_population)) - #fold_df <- data.frame(Sample = row.names(geno), FoldID = fold_ids) #Randomly assign each sample to a fold - #fold_results <- matrix(nrow = Folds, ncol = length(traits)) - #colnames(fold_results) <- traits - - #Status - updateProgressBar(session = session, id = "pb_gp", value = 50, title = "Estimating Predicted Values") - - train <- row.names(train_geno) - - #Subset datasets - #if (length(fixed_traits) == 0) { - # Fixed_train = NULL - #} else{ - # Fixed_train <- data.frame(Fixed[train, ]) - # Fixed_train <- as.matrix(Fixed_train) - # row.names(Fixed_train) <- train - #colnames(Fixed_train) <- colnames(Fixed) - Fixed_train = NULL - - #Fixed (testing) - # Fixed_test<- data.frame(Fixed[test, ]) - # Fixed_test <- as.matrix(Fixed_test) - # row.names(Fixed_test) <- test - #colnames(Fixed_test) <- colnames(Fixed) - - Pheno_train <- Pheno[train, ] # Subset the phenotype df to only retain the relevant samples from the training set - m_train <- train_geno - #Pheno_test <- Pheno[test, ] - #Fixed_test <- Fixed[test, ] #Where would the Fixed_test be used? - m_valid <- est_geno - - print(dim(m_train)) - print(dim(m_valid)) - - # Initialize a matrix to store GEBVs for this fold - GEBVs_fold <- matrix(nrow = nrow(est_geno), ncol = length(traits)) - colnames(GEBVs_fold) <- c(traits) - rownames(GEBVs_fold) <- row.names(est_geno) - - Pred_results <- matrix(nrow = nrow(est_geno), ncol = length(traits)) - colnames(Pred_results) <- c(traits) - rownames(Pred_results) <- row.names(est_geno) - - #Evaluate each trait using the same train and testing samples for each - for (trait_idx in 1:length(traits)) { - trait <- Pheno_train[, traits[trait_idx]] # Get the trait of interest - trait_answer <- mixed.solve(y= trait, Z = m_train, K = NULL, X = Fixed_train, SE = FALSE, return.Hinv = FALSE) - TRT <- trait_answer$u - e <- as.matrix(TRT) - pred_trait_test <- m_valid %*% e - pred_trait <- pred_trait_test[, 1] + c(trait_answer$beta) # Make sure this still works when using multiple traits - Pred_results[, trait_idx] <- pred_trait #save to dataframe - - # Extract GEBVs - # Check if Fixed_train is not NULL and include beta if it is - if (!is.null(Fixed_train) && !is.null(trait_answer$beta)) { - # Calculate GEBVs including fixed effects - #GEBVs_fold[, trait_idx] <- m_train %*% trait_answer$u + Fixed_train %*% matrix(trait_answer$beta, nrow = length(trait_answer$beta), ncol = 1) - #GEBVs_fold[, trait_idx] <- m_valid %*% trait_answer$u + Fixed_test %*% matrix(trait_answer$beta, nrow = length(trait_answer$beta), ncol = 1) - GEBVs_fold[, trait_idx] <- m_valid %*% trait_answer$u + Fixed_test %*% trait_answer$beta - } else { - # Calculate GEBVs without fixed effects - #GEBVs_fold[, trait_idx] <- m_train %*% trait_answer$u - GEBVs_fold[, trait_idx] <- m_valid %*% trait_answer$u #Confirm it is accuract to calculate the GEBVs for testing group from the trained model - } - - # Calculate heritability for the current trait - #Vu <- trait_answer$Vu - #Ve <- trait_answer$Ve - #heritability_scores[(((r-1)*5)+fold), trait_idx] <- Vu / (Vu + Ve) - - } - #Add iter and fold information for each trait/result - #heritability_scores[(((r-1)*5)+fold), (length(traits)+1)] <- r - #heritability_scores[(((r-1)*5)+fold), (length(traits)+2)] <- fold - - #Add sample, iteration, and fold information to GEBVs_fold - #GEBVs_fold[,"Iter"] = r - #GEBVs_fold[,"Fold"] = fold - #GEBVs_fold[,"Sample"] <- row.names(est_geno) - - # Store GEBVs for this fold - GEBVs_df <- data.frame(GEBVs_fold) - - Pred_results <- data.frame(Pred_results) - - - # Store GEBVs for this cycle - #GEBVs[[r]] <- do.call(rbind, GEBVs_cycle) - - - # Combine all GEBVs into a single DataFrame - #GEBVs_df <- as.data.frame(do.call(rbind, GEBVs)) - - #results <- as.data.frame(results) - #heritability_scores <- as.data.frame(heritability_scores) - - # Combine results and heritability_scores using cbind - #combined_results <- cbind(results, heritability_scores) - - return(list(GEBVs = GEBVs_df, Predictions = Pred_results)) + # Assign fixed_cat based on input$pred_fixed_cat2 + if (!is.null(input$pred_fixed_cat2) && length(input$pred_fixed_cat2) > 0) { + fixed_cat <- input$pred_fixed_cat2 + } else { + fixed_cat <- NULL } - - # Example call to the function - #This is slow when using 3k markers and 1.2k samples...will need to parallelize if using this script... - results <- genomic_prediction2(train_geno_adj, est_geno_adj, pheno, traits = traits, fixed_effects = fixed_traits, cores = cores) - - #With fixed effects (need to inforporate the ability for fixed effects into the prediction?) - #results <- genomic_prediction(geno_matrix, phenotype_df, c("height", "weight"), "~ age + sex") - - #Save to reactive value - pred_outputs2$trait_output <- results$Predictions - pred_outputs2$all_GEBVs <- results$GEBVs - #TESTING!!! - #write.csv(results$GEBVs, "GEBVs_test.csv") - - # Convert trait columns to numeric - results$GEBVs <- results$GEBVs %>% - mutate(across(all_of(traits), ~ as.numeric(.x))) - - - #Get average accuracy and h2 for each iter accross the 5 folds - - #columns <- setdiff(colnames(results$CombinedResults), c("Iter","Fold")) - #average_accuracy_df <- results$CombinedResults %>% - # group_by(Iter) %>% - # summarize(across(all_of(columns), mean, na.rm = TRUE)) - + # Assign fixed_cov based on conditions + if (!is.null(fixed_cat) && length(fixed_cat) == length(input$pred_fixed_info2)) { + fixed_cov <- NULL + } else if (length(input$pred_fixed_info2) > 0 && is.null(fixed_cat)) { + fixed_cov <- input$pred_fixed_info2 + } else { + fixed_cov <- setdiff(input$pred_fixed_info2, input$pred_fixed_cat2) + } + #fixed_cov <- setdiff(input$pred_fixed_info2, input$pred_fixed_cat2) + cores <- 1 + total_population <- ncol(gmat) + #train_size <- floor(percentage / 100 * total_population) #Status updateProgressBar(session = session, id = "pb_gp", value = 90, title = "Generating Results") - - ##Figures and Tables - + + #initialize dataframe + results_GEBVs <- matrix(nrow = ncol(gmat), ncol = length(traits) + 1) + results_TRAITs <- matrix(nrow = ncol(gmat), ncol = length(traits) + 1) + colnames(results_TRAITs) <- c("Sample",paste0(traits)) # Set the column names to be the traits + colnames(results_GEBVs) <- c("Sample",paste0(traits)) # Set the column names to be the traits + + #GBLUP for each trait + for (trait_idx in 1:length(traits)) { + traitpred <- kin.blup(data = pheno2, + geno = colnames(pheno2)[1], + pheno = traits[trait_idx], + fixed = fixed_cat, + covariate = fixed_cov, + K=gmat, + n.core = cores) + + results_GEBVs[, (trait_idx+1)] <- traitpred$g + results_TRAITs[, (trait_idx+1)] <- traitpred$pred + } + #Organize dataframes + results_GEBVs[,1] <- row.names(data.frame(traitpred$g)) + results_TRAITs[,1] <- row.names(data.frame(traitpred$pred)) + + #Label the samples that already had phenotype information + results_GEBVs <- data.frame(results_GEBVs) + results_TRAITs <- data.frame(results_TRAITs) + exists_in_df <- results_GEBVs[[1]] %in% pheno2[[1]] + results_GEBVs <- cbind(results_GEBVs[1], "w/Pheno" = exists_in_df, results_GEBVs[-1]) + results_TRAITs <- cbind(results_TRAITs[1], "w/Pheno" = exists_in_df, results_TRAITs[-1]) + #Status updateProgressBar(session = session, id = "pb_gp", value = 100, title = "Finished!") + + ##Make output tables depending on 1 or 2 VCF/pedigree files used. + #GEBVs + if (!is.null(advanced_options_pred$pred_est_file)) { + # Subset rows where 'w/Pheno' is FALSE and drop the 'w/Pheno' column + pred_outputs2$all_GEBVs <- results_GEBVs[results_GEBVs$`w/Pheno` == FALSE, !names(results_GEBVs) %in% "w/Pheno"] + } else{ + pred_outputs2$all_GEBVs <- results_GEBVs + } + + #BLUPs of genetic values + if (!is.null(advanced_options_pred$pred_est_file)) { + # Subset rows where 'w/Pheno' is FALSE and drop the 'w/Pheno' column + pred_outputs2$trait_output <- results_TRAITs[results_TRAITs$`w/Pheno` == FALSE, !names(results_TRAITs) %in% "w/Pheno"] + } else{ + pred_outputs2$trait_output <- results_TRAITs + } #End the event continue_prediction2(NULL) @@ -799,6 +717,35 @@ mod_GS_server <- function(input, output, session, parent_session){ file.copy(ex, file) }) + #Download files for GP + output$download_pred_results_file <- downloadHandler( + filename = function() { + paste0("Prediction-results-", Sys.Date(), ".zip") + }, + content = function(file) { + # Temporary files list + temp_dir <- tempdir() + temp_files <- c() + + # Create a temporary file for data frames + ebv_file <- file.path(temp_dir, paste0("GS-EBV-predictions-", Sys.Date(), ".csv")) + write.csv(pred_outputs2$all_GEBVs, ebv_file, row.names = FALSE) + temp_files <- c(temp_files, ebv_file) + + trait_file <- file.path(temp_dir, paste0("GS-Phenotype-predictions-", Sys.Date(), ".csv")) + write.csv(pred_outputs2$trait_output, trait_file, row.names = FALSE) + temp_files <- c(temp_files, trait_file) + + # Zip files only if there's something to zip + if (length(temp_files) > 0) { + zip(file, files = temp_files, extras = "-j") # Using -j to junk paths + } + + # Optionally clean up + file.remove(temp_files) + } + ) + ##Summary Info pred_summary_info <- function() { # Handle possible NULL values for inputs diff --git a/R/mod_GSAcc.R b/R/mod_GSAcc.R index ded934b..472e2b5 100644 --- a/R/mod_GSAcc.R +++ b/R/mod_GSAcc.R @@ -34,7 +34,7 @@ mod_GSAcc_ui <- function(id){ ), virtualSelectInput( inputId = ns("pred_fixed_info"), - label = "Select Fixed Effects (optional) (not validated):", + label = span("Select Fixed Effects", bs4Badge("beta", position = "right", color = "success")), choices = NULL, showValueAsTags = TRUE, search = TRUE, @@ -42,10 +42,11 @@ mod_GSAcc_ui <- function(id){ ), conditionalPanel( condition = "input.pred_fixed_info.length > 0", ns = ns, + "(unselected will be considered covariates)", div( virtualSelectInput( inputId = ns("pred_fixed_cat"), - label = "Select Categorical Fixed Effects (unselected will be considered covariates)", + label = "Select Categorical Fixed Effects", choices = NULL, showValueAsTags = TRUE, search = TRUE, @@ -81,7 +82,7 @@ mod_GSAcc_ui <- function(id){ bs4Dash::tabsetPanel( tabPanel("Violin Plot", plotOutput(ns("pred_violin_plot"), height = "500px")), tabPanel("Box Plot", plotOutput(ns("pred_box_plot"), height = "500px")), - tabPanel("Accuracy Table", DTOutput(ns("pred_acc_table")), style = "overflow-y: auto; height: 500px") + tabPanel("P.A. Table", DTOutput(ns("pred_acc_table")), style = "overflow-y: auto; height: 500px") ) ) @@ -163,7 +164,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ title = "Advanced Options (beta)", selectInput( inputId = ns('pred_model'), - label = 'Model Choice', + label = 'Method Choice', choices = c("GBLUP"), selected = advanced_options$pred_model # Initialize with stored value ), @@ -172,7 +173,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ div( selectInput( inputId = ns('pred_matrix'), - label = 'GBLUP Matrix Choice', + label = 'Relationship Matrix Choice', choices = c("Gmatrix", "Amatrix", "Hmatrix"), selected = advanced_options$pred_matrix # Initialize with stored value ) diff --git a/R/mod_help.R b/R/mod_help.R index 52b7206..69d0ca0 100644 --- a/R/mod_help.R +++ b/R/mod_help.R @@ -13,7 +13,7 @@ mod_help_ui <- function(id){ fluidPage( column(width=12), column(width=12, - box(title="DArT Report2VCF", id = "DArT_Report2VCF_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE, + box(title="Convert to VCF", id = "DArT_Report2VCF_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE, "This tab converts the processed genotype and counts files from DArT into a VCF file (v4.3). This file can then be used as the genotype input for the analyses within BIGapp or used with other genomics applications.", br(), br(), bs4Dash::tabsetPanel(id = "DArT_Report2VCF_tabset", @@ -27,7 +27,7 @@ mod_help_ui <- function(id){ includeMarkdown(system.file("help_files/DArT_Report2VCF_cite.Rmd", package = "BIGapp")) )) ), - box(title="Updog Dosage Calling", id = "Updog_Dosage_Calling_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE, + box(title="Dosage Calling", id = "Updog_Dosage_Calling_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE, "This tab is designed to handle the process of dosage calling in genomic data. Dosage calling is essential for determining the number of copies of a particular allele at each genomic location.", br(), br(), bs4Dash::tabsetPanel(id = "Updog_Dosage_Calling_tabset",