Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffrey Endelman authored and Jeffrey Endelman committed Jun 26, 2021
1 parent 5e80148 commit ba5908a
Show file tree
Hide file tree
Showing 21 changed files with 3,358 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
^.*\.Rproj$
^\.Rproj\.user$
^doc$
^Meta$
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# History files
.Rhistory
.Rapp.history
.DS_Store

# Session Data files
.RData
Expand Down Expand Up @@ -37,3 +38,7 @@ vignettes/*.pdf

# R Environment Variables
.Renviron
.Rproj.user
*.Rproj
doc
Meta
13 changes: 13 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Package: StageWise
Title: Two-stage analysis of multi-environment trials for plant breeding
Version: 0.01
Author: Jeffrey B. Endelman
Maintainer: Jeffrey Endelman <[email protected]>
Description: Two-stage analysis of multi-environment trials for plant breeding
Depends: R (>= 4.0)
License: GPL-3
LazyData: true
RoxygenNote: 7.1.1
Encoding: UTF-8
Imports: Matrix, ggplot2, asreml
Suggests: knitr, rmarkdown
15 changes: 15 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Generated by roxygen2: do not edit by hand

export(G_mat)
export(Stage1)
export(Stage2)
export(Stage2_prep)
export(predict_MME)
exportClasses(MME)
import(Matrix)
import(asreml)
importFrom(methods,new)
importFrom(stats,complete.cases)
importFrom(stats,model.matrix)
importFrom(tidyr,pivot_longer)
importFrom(utils,combn)
31 changes: 31 additions & 0 deletions R/G_mat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#' Additive genomic relationships
#'
#' Coefficients and relationship matrix for additive effects with bi-allelic markers
#'
#' Additive effects are based on the traditional orthogonal decomposition of genetic variance in panmictic populations (Fisher 1918; Kempthorne 1957; Endelman et al. 2018). The G matrix is computed from the coefficients and scaling factor according to G = tcrossprod(coeff/scale). Missing genotype data is replaced with the population mean.
#'
#' @references Fisher (1918) Trans. Roy. Soc. Edin. 52:399-433.
#' @references Kempthorne (1957) An Introduction to Genetic Statistics.
#' @references Endelman et al. (2018) Genetics 209:77-87.
#'
#' @param geno Matrix of allele dosages (markers x indiv)
#' @param ploidy 2 or 4
#'
#' @return List containing
#' \describe{
#' \item{coeff}{Coefficients of the marker effects (dim: indiv x marker)}
#' \item{scale}{Scaling factor between markers and indiv}
#' \item{mat}{G matrix}
#' }
#'
#' @export

G_mat <- function(geno,ploidy) {
m <- nrow(geno)
n <- ncol(geno)
coeff <- scale(t(geno),scale=F)[1:n,1:m]
coeff[is.na(coeff)] <- 0
p <- apply(geno,1,mean,na.rm=T)/ploidy
scale <- sqrt(sum(ploidy*p*(1-p)))
return(list(coeff=coeff,scale=scale,mat=tcrossprod(coeff/scale)))
}
10 changes: 10 additions & 0 deletions R/MME.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#' S4 class for solving the mixed model equations
#'
#' @slot data data frame with id, env, blue, trait (optional)
#' @slot kernels list of variance-covariance matrices for the genetic effects
#' @slot Rmat residual variance-covariance matrix

#'
#' @importFrom methods new
#' @export
MME <- setClass("MME",slots=c(data="data.frame",kernels="list",Rmat="Matrix"))
194 changes: 194 additions & 0 deletions R/Stage1.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
#' Stage 1 analysis of multi-environment trials
#'
#' Computes genotype BLUEs within each environment using ASReml-R
#'
#' The variable \code{data} must have one column labeled "id" for the individuals and one labeled "env" for the environments. The data for each environment are analyzed independently with a linear mixed model. Although not used in Stage1, to include a genotype x location effect in \code{\link{Stage2}}, a column labeled "loc" should be included in \code{data}.
#'
#' Including multiple traits in \code{trait} triggers a multivariate analysis, but for computational reasons, only 2 traits are analyzed at a time. With more than 2 traits, the software analyzes all pairs of traits. For single trait analysis, broad-sense H2 on a plot basis is computed from the variance components for each env, with genotype as a random effect. The residuals from this analysis are also returned as a table and plots.
#'
#' Argument \code{effects} is used to specify other i.i.d. effects besides genotype and has three columns: name, fixed, factor. The "name" column is a string that must match a column in \code{data}. The fixed column is a logical variable to indicate whether the effect is fixed (TRUE) or random (FALSE). The factor column is a logical variable to indicate whether the effect is a factor (TRUE) or numeric (FALSE).
#'
#' Missing response values are omitted for single-trait analysis but retained for multi-trait analysis (unless both traits are missing), to allow for prediction in Stage 2. By default, the workspace and pworkspace limits for ASReml-R are set at 500mb. If you get an error about insufficient memory, try increasing the appropriate value (workspace for variance estimation and pworkspace for BLUE computation).
#'
#' @references Damesa et al. 2017. Agronomy Journal 109: 845-857. doi:10.2134/agronj2016.07.0395
#'
#' @param data input data frame (see Details)
#' @param traits trait names (see Details)
#' @param effects data frame specifying other effects in the model (see Details)
#' @param workspace memory limit for ASRreml-R variance estimation
#' @param pworkspace memory limit for ASRreml-R BLUE computation
#' @param silent TRUE/FALSE, whether to suppress ASReml-R output
#'
#' @return List containing
#' \describe{
#' \item{blue}{data frame of BLUEs for all environments}
#' \item{vcov}{list of variance-covariance matrices for the BLUEs, one per env}
#' \item{H2}{broad-sense H2 on a plot basis (only for single trait)}
#' \item{resid}{list containing boxplot, qqplot, and table of residuals (only for single trait)}
#' }
#'
#' @importFrom stats complete.cases
#' @importFrom utils combn
#' @import asreml
#' @export

Stage1 <- function(data,traits,effects=NULL,silent=TRUE,workspace="500mb",pworkspace="500mb") {

stopifnot(requireNamespace("asreml"))
stopifnot(traits %in% colnames(data))
n.trait <- length(traits)
if (n.trait > 2) {
trait2 <- combn(traits,2)
ans <- apply(trait2,2,function(traits){Stage1(data,traits,effects,silent,workspace,pworkspace)})
return(ans)
} else {
if (!is.null(effects)) {
stopifnot(effects$names %in% colnames(data))
}
stopifnot(c("id","env") %in% colnames(data))

data$env <- as.character(data$env)
data$id <- as.character(data$id)
if (n.trait > 1) {
iz <- apply(data[,traits],1,function(z){!all(is.na(z))})
data <- data[iz,]
}
envs <- unique(data$env)
n.env <- length(envs)

#prepare asreml command
asreml.options(maxit=30,workspace=workspace,pworkspace=pworkspace,trace=!silent)
effect.table <- matrix("",nrow=2,ncol=2)
rownames(effect.table) <- c("blue","blup")
colnames(effect.table) <- c("fixed","random")
if (n.trait > 1) {
model <- sub("response",paste(traits,collapse=","),
"asreml(data=data1,na.action=na.method(y='include',x='omit'),fixed=cbind(response)~FIX,random=~RANDOM,residual=~id(units):corh(trait))",fixed=T)
effect.table[1,1] <- "id:trait"
effect.table[2,] <- NA
} else {
model <- sub("response",traits,
"asreml(data=data1,na.action=na.method(y='omit',x='omit'),fixed=response~FIX,random=~RANDOM,residual=~idv(units))",fixed=T)
effect.table[1,1] <- "id"
effect.table[2,2] <- "id"
}

factor.vars <- "id"
if (!is.null(effects)) {
for (i in 1:nrow(effects)) {
if (effects$fixed[i]) {
effect.table[,1] <- paste(effect.table[,1],effects$name[i],sep="+")
} else {
effect.table[,2] <- paste(effect.table[,2],effects$name[i],sep="+")
}
}
factor.vars <- c(factor.vars,effects$name[which(effects$factor)])
numeric.vars <- effects$name[which(!effects$factor)]
} else {
numeric.vars <- character(0)
}
n.numeric <- length(numeric.vars)

if (effect.table[1,2]=="") {
blue.model <- sub("random=~RANDOM,","",model,fixed=T)
} else {
blue.model <- sub("RANDOM",effect.table[1,2],model,fixed=T)
}
blue.model <- sub("FIX",effect.table[1,1],blue.model,fixed=T)

tmp <- effect.table[2,1]
if (tmp=="") {
blup.model <- sub("FIX","1",model,fixed=T)
} else {
if (substr(tmp,1,1)=="+") {
tmp <- substr(tmp,2,nchar(tmp))
}
blup.model <- sub("FIX",tmp,model,fixed=T)
}
blup.model <- sub("RANDOM",effect.table[2,2],blup.model,fixed=T)

resid.blup <- NULL
vcov <- vector("list",n.env)
names(vcov) <- envs
if (n.trait==1) {
H2 <- data.frame(env=envs,H2=as.numeric(NA))
}

blue.out <- NULL
blup.resid <- NULL
cat(sub("X",paste(traits,collapse=" "),"Traits: X\n"))
for (j in 1:n.env) {
cat(sub("X",envs[j],"Env: X\n"))
ix <- which(data$env==envs[j])
if (length(ix)==0) {
cat("Warning: No data present\n")
} else {
data1 <- data[ix,]
for (q in 1:length(factor.vars)) {
eval(parse(text="data1[,factor.vars[q]] <- factor(as.character(data1[,factor.vars[q]]))"))
}
if (n.numeric > 0) {
for (q in 1:n.numeric) {
eval(parse(text="data1[,numeric.vars[q]] <- as.numeric(data1[,numeric.vars[q]])"))
}
}
}

ans <- eval(parse(text=blue.model))
while (!ans$converge) {
cat("Fixed effects model failed to converge. Do you wish to continue running? y/n \n")
input <- readLines(n=1)
if (input=="y") {
ans <- update.asreml(ans)
} else {
return()
}
}
if (n.trait > 1) {
predans <- predict.asreml(ans,classify="id:trait",vcov = TRUE)
tmp <- predans$pvals[,c("id","trait","predicted.value")]
colnames(tmp) <- c("id","trait","BLUE")
} else {
predans <- predict.asreml(ans,classify="id",vcov = TRUE)
tmp <- predans$pvals[,c("id","predicted.value")]
colnames(tmp) <- c("id","BLUE")
}
tmp$id <- as.character(tmp$id)

blue.out <- rbind(blue.out,data.frame(env=envs[j],tmp))
vcov[[j]] <- predans$vcov
rownames(vcov[[j]]) <- colnames(vcov[[j]]) <- paste(tmp$id,tmp$env,sep=":")

if (n.trait==1) {
ans <- eval(parse(text=blup.model))
while (!ans$converge) {
cat("Random effects model failed to converge. Do you wish to continue running? y/n \n")
input <- readLines(n=1)
if (input=="y") {
ans <- update.asreml(ans)
} else {
return()
}
}
vc <- summary(ans)$varcomp
Vg <- vc[match("id",rownames(vc)),1]
Ve <- vc[match("units!units",rownames(vc)),1]
H2$H2[j] <- round(Vg/(Vg+Ve),2)
blup.resid <- rbind(blup.resid,data.frame(id=as.character(data1$id),env=envs[j],
resid=resid(ans)))
}
}
if ("loc" %in% colnames(data)) {
blue.out$loc <- as.character(data$loc[match(data$env,data$loc)])
}
if (n.trait==1) {
p1 <- ggplot(data=blup.resid,aes(y=resid,x=env)) + ylab("Residual") + xlab("") +
stat_boxplot(outlier.color="red") + theme_bw() + theme(axis.text.x=element_text(angle=90,vjust=0.5))
p2 <- ggplot(data=blup.resid,aes(sample=resid)) + stat_qq() + stat_qq_line() + facet_wrap(~env) + theme_bw() + xlab("Expected") + ylab("Observed")

return(list(blue=blue.out,vcov=vcov,resid=list(boxplot=p1,qqplot=p2,table=blup.resid),H2=H2))
} else {
return(list(blue=blue.out,vcov=vcov))
}
}
}
Loading

0 comments on commit ba5908a

Please sign in to comment.