Skip to content

Commit

Permalink
v1.10
Browse files Browse the repository at this point in the history
  • Loading branch information
jendelman committed Jul 18, 2024
1 parent bc26ef7 commit 3c1e618
Show file tree
Hide file tree
Showing 18 changed files with 259 additions and 137 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: StageWise
Title: Two-stage analysis of multi-environment trials for genomic selection and GWAS
Version: 1.09
Version: 1.10
Author: Jeffrey B. Endelman
Maintainer: Jeffrey Endelman <[email protected]>
Description: Fully efficient, two stage analysis of multi-environment trials, including directional dominance and multi-trait genomic selection
Depends: R (>= 4.0)
License: GPL-3
RoxygenNote: 7.2.3
Encoding: UTF-8
Imports: Matrix (>= 1.5-0), ggplot2, methods, ggrepel, rlang, ggpubr, SpATS, spam, AGHmatrix, MASS, CVXR, ggforce
Imports: Matrix (>= 1.5-0), ggplot2, methods, ggrepel, rlang, ggpubr, SpATS, spam, AGHmatrix, MASS, CVXR, ggforce, data.table
Suggests: knitr, rmarkdown, asreml
Collate:
'Stage1.R'
Expand Down
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
Changes in 1.10
* Added support for multi-population/multi-ploidy analysis
* Fixed error in Stage1 related to multiple experiments per env

Changes in 1.09
* changed read.csv to fread in read_geno

Expand Down
105 changes: 57 additions & 48 deletions R/Stage1.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#' @return List containing
#' \describe{
#' \item{blues}{data frame of BLUEs}
#' \item{vcov}{list of variance-covariance matrices for the BLUEs, one per experiment (env)}
#' \item{vcov}{list of variance-covariance matrices for the BLUEs, one per env}
#' \item{fit}{data frame with broad-sense H2 (plot basis) and/or AIC}
#' \item{resid}{For single trait, list of diagnostic plots and data frame of residuals. For multi-trait, list of resid var-cov matrices.}
#' }
Expand Down Expand Up @@ -312,55 +312,64 @@ Stage1 <- function(filename,traits,effects=NULL,solver="asreml",
blue.out$env <- data$env[match(blue.out$expt,data$expt)]

nee <- sapply(expt.in.env,length)
iu <- which(nee==1)
if (length(iu) > 0) {
tmp <- names(vcov)[iu]
names(vcov)[iu] <- blue.out$env[match(tmp,blue.out$expt)]
}
#iu <- which(nee==1)
#if (length(iu) > 0) {
# tmp <- names(vcov)[iu]
# names(vcov)[iu] <- blue.out$env[match(tmp,blue.out$expt)]
#}

for (j in which(nee > 1)) {
omega.list <- vcov[expt.in.env[[j]]]
vcov2 <- mapply(FUN=function(x,y){
tmp <- paste(y,rownames(x),sep=":")
dimnames(x) <- list(tmp,tmp)
return(x)},x=omega.list,y=as.list(expt.in.env[[j]]))
vcov.env <- vector("list",length(nee))
names(vcov.env) <- names(nee)
iu <- which(nee==1)
if (length(iu) > 0)
vcov.env[names(iu)] <- vcov[unlist(expt.in.env[iu])]

.GlobalEnv$asremlOmega <- direct_sum(lapply(vcov2,solve))
dname <- lapply(vcov2,rownames)
dimnames(.GlobalEnv$asremlOmega) <- list(unlist(dname),unlist(dname))
attr(.GlobalEnv$asremlOmega,"INVERSE") <- TRUE
iu <- which(nee > 1)
if (length(iu) > 0)
for (j in iu) {
omega.list <- vcov[expt.in.env[[j]]]
vcov2 <- mapply(FUN=function(x,y){
tmp <- paste(y,rownames(x),sep=":")
dimnames(x) <- list(tmp,tmp)
return(x)},x=omega.list,y=as.list(expt.in.env[[j]]))

ix <- which(blue.out$env==envs[j])
data2 <- blue.out[ix,]
data2$expt.id <- factor(paste(data2$expt,data2$id,sep=":"))
data2$id <- factor(data2$id)
data2$expt <- factor(data2$expt)
start.table <- asreml::asreml(data=data2,fixed=BLUE~expt-1+id,
random=~vm(expt.id,source=asremlOmega),
residual=~idv(units),start.values=TRUE)$vparameters.table
k <- grep("Omega",start.table$Component,fixed=T)
start.table$Value[k] <- 1
start.table$Constraint[k] <- "F"
ans3 <- asreml::asreml(data=data2,fixed=BLUE~expt-1+id,
random=~vm(expt.id,source=asremlOmega),
residual=~idv(units),G.param=start.table)
.GlobalEnv$asremlOmega <- direct_sum(lapply(vcov2,solve))
dname <- lapply(vcov2,rownames)
dimnames(.GlobalEnv$asremlOmega) <- list(unlist(dname),unlist(dname))
attr(.GlobalEnv$asremlOmega,"INVERSE") <- TRUE

predans3 <- asreml::predict.asreml(ans3,classify="id",vcov = TRUE)
blue3 <- data.frame(expt=NA,predans3$pvals[,c("id","predicted.value")],env=envs[j])
colnames(blue3) <- c("expt","id","BLUE","env")
vcov3 <- predans3$vcov
dimnames(vcov3) <- list(blue3$id,blue3$id)
vcov2 <- vcov[-match(expt.in.env[[j]],names(vcov))]
vcov <- c(vcov2,vcov3)
names(vcov) <- c(names(vcov2),envs[j])
blue.out <- rbind(blue.out[-ix,],blue3)
rm("asremlOmega",envir = .GlobalEnv)
}
ix <- which(blue.out$env==envs[j])
data2 <- blue.out[ix,]
data2$expt.id <- factor(paste(data2$expt,data2$id,sep=":"))
data2$id <- factor(data2$id)
data2$expt <- factor(data2$expt)
start.table <- asreml::asreml(data=data2,fixed=BLUE~expt-1+id,
random=~vm(expt.id,source=asremlOmega),
residual=~idv(units),start.values=TRUE)$vparameters.table
k <- grep("Omega",start.table$Component,fixed=T)
start.table$Value[k] <- 1
start.table$Constraint[k] <- "F"
ans3 <- asreml::asreml(data=data2,fixed=BLUE~expt-1+id,
random=~vm(expt.id,source=asremlOmega),
residual=~idv(units),G.param=start.table)

predans3 <- asreml::predict.asreml(ans3,classify="id",vcov = TRUE)
blue3 <- data.frame(expt=NA,predans3$pvals[,c("id","predicted.value")],env=envs[j])
colnames(blue3) <- c("expt","id","BLUE","env")
vcov3 <- predans3$vcov
dimnames(vcov3) <- list(blue3$id,blue3$id)
#vcov2 <- vcov[-match(expt.in.env[[j]],names(vcov))]
#vcov <- c(vcov2,vcov3)
#names(vcov) <- c(names(vcov2),envs[j])
vcov.env[[names(iu)[j]]] <- vcov3
blue.out <- rbind(blue.out[-ix,],blue3)
rm("asremlOmega",envir = .GlobalEnv)
}
}

if (n.trait > 1) {
vcov <- vector("list",n.env)
names(vcov) <- envs
vcov.env <- vector("list",n.env)
names(vcov.env) <- envs

expt.in.env <- lapply(split(data$expt,factor(data$env,levels=envs)),unique)
nexpt.per.env <- sapply(expt.in.env,length)
Expand Down Expand Up @@ -404,10 +413,10 @@ Stage1 <- function(filename,traits,effects=NULL,solver="asreml",
predans <- asreml::predict.asreml(ans,classify="id:trait",vcov = TRUE)
tmp <- predans$pvals[,c("id","trait","predicted.value")]
colnames(tmp) <- c("id","trait","BLUE")
vcov[[j]] <- predans$vcov
vcov.env[[j]] <- predans$vcov
tmp$id <- as.character(tmp$id)
id.trait <- apply(tmp[,1:2],1,paste,collapse=":")
dimnames(vcov[[j]]) <- list(id.trait,id.trait)
dimnames(vcov.env[[j]]) <- list(id.trait,id.trait)
blue.out <- rbind(blue.out,data.frame(env=envs[j],tmp))
}
}
Expand All @@ -428,13 +437,13 @@ Stage1 <- function(filename,traits,effects=NULL,solver="asreml",
stat_boxplot(outlier.color="red") + theme_bw() + theme(axis.text.x=element_text(angle=90,vjust=0.5))
p2 <- ggplot(data=blup.resid,aes(sample=.data$resid)) + stat_qq() + stat_qq_line() + facet_wrap(~expt) + theme_bw() + xlab("Expected") + ylab("Observed")
if (solver=="ASREML")
return(list(blues=blue.out,vcov=vcov,fit=fit,
return(list(blues=blue.out,vcov=vcov.env,fit=fit,
resid=list(boxplot=p1,qqplot=p2,table=blup.resid)))
if (solver=="SPATS")
return(list(blues=blue.out,vcov=vcov,fit=fit,
return(list(blues=blue.out,vcov=vcov.env,fit=fit,
resid=list(boxplot=p1,qqplot=p2,spatial=spatial.plot,table=blup.resid)))
} else {
#Multi-trait
return(list(blues=blue.out,vcov=vcov,fit=fit,resid=resid.vc))
return(list(blues=blue.out,vcov=vcov.env,fit=fit,resid=resid.vc))
}
}
18 changes: 11 additions & 7 deletions R/Stage2.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ Stage2 <- function(data,vcov=NULL,geno=NULL,fix.eff.marker=NULL,
diag(resid.vc) <- diag(resid.vc)/(n.trait-1)
resid.vc <- fu(resid.vc)
diag(B) <- diag(B)/(n.trait-1)
B <- fu(B)
if (max(abs(diag(B))) > .Machine$double.eps*10)
B <- fu(B)

if (non.add!="none") {
diag(geno2) <- diag(geno2)/(n.trait-1)
Expand Down Expand Up @@ -138,6 +139,9 @@ Stage2 <- function(data,vcov=NULL,geno=NULL,fix.eff.marker=NULL,
if (!is.null(geno)) {
stopifnot(is(geno,"class_geno"))
id <- sort(intersect(data$id,rownames(geno@G)))
if (length(geno@ploidy) > 1)
geno@ploidy <- geno@ploidy[id]

n <- length(id)
data <- data[data$id %in% id,]
id.weights <- table(factor(data$id,levels=id))
Expand All @@ -150,7 +154,7 @@ Stage2 <- function(data,vcov=NULL,geno=NULL,fix.eff.marker=NULL,
meanD <- as.numeric(pvar(V=.GlobalEnv$asremlD,weights=id.weights))

dom.covariate <- as.numeric(geno@coeff.D[id,] %*% matrix(1,nrow=ncol(geno@coeff.D),ncol=1))
dom.covariate <- dom.covariate/(geno@scale*(geno@ploidy-1))
dom.covariate <- dom.covariate/(geno@scale[id]*(geno@ploidy-1))
names(dom.covariate) <- id
data$dom <- dom.covariate[data$id]

Expand Down Expand Up @@ -184,7 +188,7 @@ Stage2 <- function(data,vcov=NULL,geno=NULL,fix.eff.marker=NULL,
data$env.id <- factor(data$env.id,levels=env.id)

out <- vector("list",4)
names(out) <- c("aic","vars","fixed","random")
names(out) <- c("aic","vars","params","random")
n.loc <- 1
n.trait <- 1

Expand Down Expand Up @@ -243,8 +247,9 @@ Stage2 <- function(data,vcov=NULL,geno=NULL,fix.eff.marker=NULL,
model <- sub("FIX",paste(c("env",covariates,fix.eff.marker,dom),collapse="+"),model,fixed=T)
} else {
model <- "asreml(data=data,fixed=BLUE~FIX-1,random=~RANDOM,residual=~dsum(~idv(units)|loc)"
tmp <- gsub("+",":loc+",paste(c("env",fix.eff.marker,dom),collapse="+"),fixed=T)
model <- sub("FIX",paste(tmp,"loc",sep=":"),model,fixed=T)
tmp <- gsub("+",":loc+",paste(c(fix.eff.marker,dom,"env"),collapse="+"),fixed=T)
#model <- sub("FIX",paste(tmp,"loc",sep=":"),model,fixed=T)
model <- sub("FIX",tmp,model,fixed=T)
}
} else {
model <- "asreml(data=data,fixed=BLUE~FIX-1,random=~RANDOM,residual=~id(env.id):us(Trait)"
Expand Down Expand Up @@ -477,12 +482,11 @@ Stage2 <- function(data,vcov=NULL,geno=NULL,fix.eff.marker=NULL,
}

if (non.add=="dom") {
gamma <- (geno@ploidy/2 - 1)/(geno@ploidy - 1)
ix <- grep("dom",rownames(beta),fixed=T)
out$params$heterosis <- data.frame(trait=traits,
estimate=as.numeric(beta[ix,1]),
SE=as.numeric(beta[ix,2]))

gamma <- mean((geno@ploidy/2 - 1)/(geno@ploidy - 1))
weights <- id.weights/sum(id.weights)
for (i in 1:n.trait)
for (j in i:n.trait) {
Expand Down
12 changes: 8 additions & 4 deletions R/blup.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,18 @@ blup <- function(data, geno=NULL, what, index.coeff=NULL, gwas.ncore=0L) {
if (substr(what,1,2)=="DM" & (!(is(geno,"class_genoD")) | data[[1]]@model < 3L))
stop("Dominance model is required in geno and data")

n.id <- length(data[[1]]@id)
gamma <- 0
if (!is.null(geno)) {
if (substr(what,1,2)=="GV")
gamma <- 1
if (substr(what,1,2)=="BV" & data[[1]]@model==3L)
gamma <- (geno@ploidy/2 - 1)/(geno@ploidy - 1)

if (length(gamma) > 1) {
tmp <- tapply(gamma,attr(geno@ploidy,"pop"),mean)
gamma <- sum(tmp[names(index.coeff)]*index.coeff)/sum(index.coeff)
}
}

if (data[[1]]@model < 2L) {
Expand All @@ -60,8 +66,6 @@ blup <- function(data, geno=NULL, what, index.coeff=NULL, gwas.ncore=0L) {
index.scale <- sqrt(unlist(sapply(data,function(z){diag(as.matrix(z@geno1.var))})) +
gamma^2*unlist(sapply(data,function(z){diag(as.matrix(z@geno2.var))})))
}

n.id <- length(data[[1]]@id)

if (!multi.prep) {
data <- data2
Expand Down Expand Up @@ -176,12 +180,12 @@ blup <- function(data, geno=NULL, what, index.coeff=NULL, gwas.ncore=0L) {

if (what=="AM") {
G <- kron(geno@eigen.G,1)
M <- kronecker(crossprod(geno@coeff,crossprod(G$inv)),matrix(index.coeff,nrow=1))/geno@scale
M <- kronecker(crossprod((geno@coeff/geno@scale),crossprod(G$inv)),matrix(index.coeff,nrow=1))
effect <- as.numeric(M %*% matrix(data@random[1:n.random],ncol=1))
V <- data@var.uhat[1:n.random,1:n.random] #used for GWAS
} else {
D <- kron(geno@eigen.D,1)
M <- kronecker(crossprod(geno@coeff.D,crossprod(D$inv)),matrix(index.coeff,nrow=1))/geno@scale.D
M <- kronecker(crossprod((geno@coeff.D/geno@scale.D),crossprod(D$inv)),matrix(index.coeff,nrow=1))
dhat <- -kronecker(matrix(geno@Fg[data@id],ncol=1),matrix(data@heterosis,ncol=1)) +
matrix(data@random[n.random + 1:n.random],ncol=1)
effect <- as.numeric(M %*% dhat)
Expand Down
23 changes: 12 additions & 11 deletions R/blup_prep.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
#'
#' The \code{method} argument can be used to control how the linear system is solved. "MME" leads to inversion of the MME coefficient matrix, while "Vinv" leads to inversion of the overall var-cov matrix for the response vector. If NULL, the software uses whichever method involves inverting the smaller matrix. If the number of random effects (m) is less than the number of BLUEs (n), "MME" is used.
#'
#' For the multi-location model, if all of the environments for a location are masked, the average of the other locations is used when computed average fixed effects.
#' For the multi-location model, if all of the environments for a location are masked, the average of the other locations is used when computing average fixed effects.
#'
#' @param data data frame of BLUEs from Stage 1
#' @param vcov list of variance-covariance matrices for the BLUEs
#' @param geno object of \code{\link{class_geno}} from \code{\link{read_geno}}
#' @param vars object of \code{\link{class_var}} from \code{\link{Stage2}}
#' @param mask (optional) data frame with possible columns "id","env","trait"
#' @param mask (optional) data frame with possible columns "id","env","loc","trait"
#' @param method (optional) "MME", "Vinv", NULL (defaut). see Details
#'
#' @return Object of \code{\link{class_prep}}
Expand Down Expand Up @@ -50,7 +50,7 @@ blup_prep <- function(data,vcov=NULL,geno=NULL,vars,mask=NULL,method=NULL) {
data <- data[!is.na(data$BLUE),]

if (!is.null(mask)) {
tmp <- intersect(colnames(mask),c("id","env","trait"))
tmp <- intersect(colnames(mask),c("id","env","trait","loc"))
stopifnot(length(tmp)>0)
if (length(tmp) > 1) {
tmp2 <- apply(mask[,tmp],1,paste,collapse=":")
Expand Down Expand Up @@ -189,10 +189,10 @@ blup_prep <- function(data,vcov=NULL,geno=NULL,vars,mask=NULL,method=NULL) {
Gmat2 <- kron(eigen.A=geno@eigen.D, B=vars@geno2)

#add to X matrix
dom.covariate <- Z %*% kronecker(geno@coeff.D %*% matrix(1,nrow=ncol(geno@coeff.D),ncol=1),
diag(n.loc))
dom.covariate <- Z %*% kronecker((geno@coeff.D/(geno@scale*(ploidy-1))) %*%
matrix(1,nrow=ncol(geno@coeff.D),ncol=1),diag(n.loc))
colnames(dom.covariate) <- paste("heterosis",locations,sep=":")
X <- cbind(X,dom.covariate/(geno@scale*(ploidy-1)))
X <- cbind(X,dom.covariate)
} else {
Gmat2 <- kron(eigen.A=eigen.I, B=vars@geno2)
}
Expand All @@ -213,8 +213,9 @@ blup_prep <- function(data,vcov=NULL,geno=NULL,vars,mask=NULL,method=NULL) {
Gmat1 <- kron(eigen.A = geno@eigen.G, B=vars@geno1)
if (vars@model==3L) {
Gmat2 <- kron(eigen.A=geno@eigen.D, B=vars@geno2)
dom.covariate <- as.numeric(Z %*% geno@coeff.D %*% matrix(1,nrow=ncol(geno@coeff.D),ncol=1))
X <- cbind(X,heterosis=dom.covariate/(geno@scale*(ploidy-1)))
dom.covariate <- as.numeric(Z %*% (geno@coeff.D/(geno@scale*(ploidy-1))) %*%
matrix(1,nrow=ncol(geno@coeff.D),ncol=1))
X <- cbind(X,heterosis=dom.covariate)
} else {
Gmat2 <- kron(eigen.A=eigen.I, B=vars@geno2)
}
Expand Down Expand Up @@ -254,10 +255,10 @@ blup_prep <- function(data,vcov=NULL,geno=NULL,vars,mask=NULL,method=NULL) {
Gmat2 <- kron(eigen.A=geno@eigen.D, B=vars@geno2)

#add to X matrix
dom.covariate <- Z %*% kronecker(geno@coeff.D %*% matrix(1,nrow=ncol(geno@coeff.D),ncol=1),
diag(n.trait))
dom.covariate <- Z %*% kronecker((geno@coeff.D/(geno@scale*(ploidy-1))) %*%
matrix(1,nrow=ncol(geno@coeff.D),ncol=1),diag(n.trait))
colnames(dom.covariate) <- paste("heterosis",traits,sep=":")
X <- cbind(X,dom.covariate/(geno@scale*(ploidy-1)))
X <- cbind(X,dom.covariate)
} else {
Gmat2 <- kron(eigen.A=eigen.I, B=vars@geno2)
}
Expand Down
4 changes: 2 additions & 2 deletions R/class_geno.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#' S4 class for marker genotype data
#'
#' @slot ploidy ploidy
#' @slot ploidy If mixed ploidy, then a vector equal to pop size; otherwise a single integer
#' @slot map Marker map positions
#' @slot coeff Coefficients of the marker effects (dim: indiv x marker)
#' @slot scale Scaling factor between markers and indiv
#' @slot scale Scaling factor between markers and indiv, vector of length equal to pop size
#' @slot G Additive relationship matrix (from markers and potentially also pedigree)
#' @slot eigen.G list of eigenvalues and eigenvectors
#'
Expand Down
Loading

0 comments on commit 3c1e618

Please sign in to comment.