diff --git a/DESCRIPTION b/DESCRIPTION index 6e38d0a..d320d1c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: polyBreedR Title: Genomics-assisted breeding for polyploids (and diploids) -Version: 0.37 +Version: 0.38 Author: Jeffrey B. Endelman Maintainer: Jeffrey Endelman <endelman@wisc.edu> Description: Genomics-assisted breeding for polyploids (and diploids) diff --git a/NEWS b/NEWS index 33afea0..2e4252b 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +Changes in 0.38 +* Added chunk.size option to gbs + Changes in 0.37 * Vignette 3 diff --git a/R/gbs.R b/R/gbs.R index 3eba640..daf10dd 100644 --- a/R/gbs.R +++ b/R/gbs.R @@ -6,11 +6,14 @@ #' #' Posterior mode and mean genotypes are added as GT and DS fields. GQ is also added based on probability of posterior mode. Binomial calculation uses R/updog package (Gerard et al. 2018) with "norm" prior. Previous INFO is discarded; adds NS, DP.AVG, AF.GT, AB, OD, SE. #' +#' The input file is processed in chunks of size \code{chunk.size}. +#' #' @param in.file VCF input file #' @param out.file VCF output file #' @param ploidy ploidy #' @param bias TRUE/FALSE, whether to estimate allelic bias #' @param n.core number of cores +#' @param chunk.size number of variants to process at a time #' @param silent TRUE/FALSE #' #' @return nothing @@ -21,7 +24,10 @@ #' @importFrom stats anova lm chisq.test gbs <- function(in.file, out.file, ploidy, bias=TRUE, n.core=1, - silent=FALSE) { + chunk.size=1000, silent=FALSE) { + + chunk.size <- as.integer(chunk.size) + stopifnot(chunk.size > 0) prior <- "norm" if (bias) { @@ -106,14 +112,13 @@ gbs <- function(in.file, out.file, ploidy, bias=TRUE, n.core=1, return(paste(c(info,"GT:AD:DP:DS:GQ",z),collapse="\t")) } - block.size <- 100 - nb <- prep$n.mark %/% block.size+1 + nb <- prep$n.mark %/% chunk.size+1 i=1 for (i in 1:nb) { - tmp <- readLines(con.in,block.size) + tmp <- readLines(con.in,chunk.size) m <- length(tmp) if (!silent) - cat(sub("X",(i-1)*block.size + m,"Progress: X markers\n")) + cat(sub("X",(i-1)*chunk.size + m,"Progress: X markers\n")) tmp2 <- strsplit(tmp,split="\t",fixed=T) x <- lapply(tmp2,function(x){vcf_extract(x[-(1:8)],"AD")}) if (n.core > 1) { diff --git a/docs/polyBreedR_Manual.pdf b/docs/polyBreedR_Manual.pdf index 9cd0dc0..2d2ea01 100644 Binary files a/docs/polyBreedR_Manual.pdf and b/docs/polyBreedR_Manual.pdf differ diff --git a/man/gbs.Rd b/man/gbs.Rd index a21c75a..15926de 100644 --- a/man/gbs.Rd +++ b/man/gbs.Rd @@ -4,7 +4,15 @@ \alias{gbs} \title{Genotype calls for GBS} \usage{ -gbs(in.file, out.file, ploidy, bias = TRUE, n.core = 1, silent = FALSE) +gbs( + in.file, + out.file, + ploidy, + bias = TRUE, + n.core = 1, + chunk.size = 1000, + silent = FALSE +) } \arguments{ \item{in.file}{VCF input file} @@ -17,6 +25,8 @@ gbs(in.file, out.file, ploidy, bias = TRUE, n.core = 1, silent = FALSE) \item{n.core}{number of cores} +\item{chunk.size}{number of variants to process at a time} + \item{silent}{TRUE/FALSE} } \value{ @@ -29,4 +39,6 @@ Genotype calls for genotype-by-sequencing (GBS) data VCF input file must contain AD field. Variants with more than 2 alleles are coerced to zero DP, so better to filter them out first. Posterior mode and mean genotypes are added as GT and DS fields. GQ is also added based on probability of posterior mode. Binomial calculation uses R/updog package (Gerard et al. 2018) with "norm" prior. Previous INFO is discarded; adds NS, DP.AVG, AF.GT, AB, OD, SE. + +The input file is processed in chunks of size \code{chunk.size}. }