From 2c5e8e79921010269ad7dd35bff96faa21bb7121 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Wed, 17 Aug 2022 13:01:42 -0700
Subject: [PATCH 1/7] Updates to `dice()` and `slab()`  - `slab()`: replace
 slice() with dice()  - `dice()`: address some TODOs

---
 R/dice.R          |  91 +++++++++--------
 R/genSlabLabels.R |  56 +++++------
 R/slab.R          | 250 +++++++++++++++++++---------------------------
 3 files changed, 180 insertions(+), 217 deletions(-)

diff --git a/R/dice.R b/R/dice.R
index ed7dfffeb..390768623 100644
--- a/R/dice.R
+++ b/R/dice.R
@@ -36,19 +36,26 @@
 #' 
 #' @export
 #' 
-dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, strict = TRUE, byhz = FALSE) {
-  
+dice <-  function(x,
+                  fm = NULL,
+                  SPC = TRUE,
+                  pctMissing = FALSE,
+                  fill = FALSE,
+                  strict = TRUE,
+                  byhz = FALSE,
+                  verbose = FALSE) {
+    
   # sacrifice to R CMD check spirits
   .pctMissing <- NULL
   
   # find / flag / remove invalid profiles or horizons
   # this will generate an error if there are no valid profiles remaining
-  if(strict) {
-    x <- HzDepthLogicSubset(x, byhz = byhz)
+  if  (strict) {
+    x <- HzDepthLogicSubset(x, byhz = TRUE)
     
     ## TODO: this could invoke 2x calls to fillHzGaps
     # removed horizons will trigger an automatic gap-filling
-    if(!is.null(metadata(x)$removed.horizons)) {
+    if (!is.null(metadata(x)$removed.horizons)) {
       message('filling gaps left by HzDepthLogicSubset')
       x <- fillHzGaps(x, flag = TRUE, to_top = NULL, to_bottom = NULL)
     }
@@ -67,14 +74,13 @@ dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, str
   ids.top.bottom.idx <- match(c(idn, hzidn, htb), hznames)
   
   # interpret optional formula
-  if(!is.null(fm)) {
+  if (!is.null(fm)) {
     # something supplied
     # reasonable?
-    if(! inherits(fm, "formula")) {
+    if (!inherits(fm, "formula")) {
       stop('must provide a valid formula', call. = FALSE)
     }
     
-    
     # extract components of the formula:
     fm <- str_c(deparse(fm, 500), collapse = "")
     elements <- str_split(fm, fixed("~"))[[1]]
@@ -92,17 +98,17 @@ dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, str
     vars <- fm[[2]] 
     
     # check for bogus left/right side problems with the formula
-    if(any(z < 0) | any(is.na(z))) {
+    if (any(z < 0) | any(is.na(z))) {
       stop('z-slice must be >= 0', call. = FALSE)
     }
       
     # check for integer / numeric slices
-    if(! inherits(z,  c('numeric','integer'))) {
+    if (!inherits(z,  c('numeric','integer'))) {
       stop('z-slice must be either numeric or integer', call. = FALSE)
     }
       
     # if z-index is missing set to NULL for later on
-    if(length(z) == 0) {
+    if (length(z) == 0) {
       z <- NULL
     } else {
       # z index is specified
@@ -111,13 +117,13 @@ dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, str
     }
     
     # check for '.' --> all variables, minus ID/depths
-    if(any(vars == '.')) {
+    if (any(vars == '.')) {
       # all variables except profile ID, horizon ID, top, bottom
       vars <- hznames[-ids.top.bottom.idx]
     }
 
     # check for column names that don't exist
-    if(! any(vars %in% hznames)) {
+    if (!any(vars %in% hznames)) {
       stop('names in formula do not match any horizon attributes', call. = FALSE)
     }
       
@@ -127,7 +133,7 @@ dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, str
     # slice to-depth of collection
     
     # optionally fill all gaps between min(x) --- [gaps] --- max(x)
-    if(fill) {
+    if (fill) {
       x <- fillHzGaps(x, flag = TRUE, to_top = min(x), to_bottom = max(x))
     }
     
@@ -147,11 +153,12 @@ dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, str
   )
   
   # convert to DT
-  if(! inherits(h, 'data.table')) {
+  if (!inherits(h, 'data.table')) {
     h <- as.data.table(h)
   }
   
   ## TODO: are keys worth the extra sorting / re-sorting?
+  ##       no, probably not inherently worth it -AGB 2022/08/17
   # init keys, sorts the data on hzID (alpha-sort)
   # setkeyv(h, hzidn)
   # consider an index, seems to have no effect
@@ -163,21 +170,31 @@ dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, str
   
   # safe wrapper to base::seq
   # NA in from/to (hz depths) not allowed
-  # TODO: from == to (0-thickness) horizons ??
   .seq.safe <- function(from, to, by) {
-    if(any(is.na(from)) | any(is.na(to))) {
+    if (any(is.na(from)) | any(is.na(to))) {
       stop('corrupt horizonation, consider using `strict = TRUE`', call. = FALSE)
     }
-    
+    if (from > to) {
+      toold <- to
+      to <- from
+      from <- toold
+    }
     return(
-      seq(from = from, to = to, by = by)
+      seq(from = from, to = to, by = abs(by))
     )
   }
   
+  # from == to (0-thickness) horizons -- these should be removed -AGB 2022/08/17
+  zthk <- h[[htb[1]]] == h[[htb[2]]]
+  if (any(zthk, na.rm = TRUE)) {
+    message("horizons with zero thickness have been omitted from results")
+  }
+  h.sub <- h[which(!zthk),]
+  
   tops <- mapply(
     FUN = .seq.safe, 
-    from = h[[htb[1]]], 
-    to = h[[htb[2]]] - 1, 
+    from = h.sub[[htb[1]]], 
+    to = h.sub[[htb[2]]] - 1, 
     by = 1, 
     SIMPLIFY = FALSE
   )
@@ -188,30 +205,21 @@ dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, str
   # expand slice IDs (horizon IDs)
   # these are used to join with horizons
   sliceIDs <- rep(
-    h[[hzidn]], 
-    times = h[[htb[2]]] - h[[htb[1]]]
+    h.sub[[hzidn]], 
+    times = h.sub[[htb[2]]] - h.sub[[htb[1]]]
   )
   
-  
   # assemble slice LUT for JOIN
   s <- data.table(
     sliceID = sliceIDs, 
-    .sliceTop = tops, 
-    .sliceBottom = bottoms
+    .sliceTop = tops[1:length(sliceIDs)], 
+    .sliceBottom = bottoms[1:length(sliceIDs)]
   )
   
   # re-name for simpler JOIN
   names(s)[1] <- hzidn
-  
-  
-  ## TODO: are keys worth the extra sorting / re-sorting?
-  # note: sorts data
-  # setkeyv(s, hzidn)
-  # consider an index, seems to have no effect
-  # setindexv(s, hzidn)
-  
+
   # FULL JOIN via fast data.table compiled code
-  # using index (?)
   res <- merge(h, s, by = hzidn, all = TRUE, sort = FALSE)
   
   ## TODO: update to := syntax, but how does it work with with variables?
@@ -231,14 +239,13 @@ dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, str
   res[['.sliceTop']] <- NULL
   res[['.sliceBottom']] <- NULL
   
-  
   ## TODO: move z-index subset"up" to original sequence creation to save a lot of time
-  if(!is.null(z)) {
+  if (!is.null(z)) {
     res <- res[which(res[[htb[1]]] %in% z), ]
   }
   
   # slice-wise "percent missing" calculation
-  if(pctMissing) {
+  if (pctMissing) {
     
     # this is quite slow: DT -> DF -> matrix -> apply (8x longer)
     # res[['.pctMissing']] <- .pctMissing(res, vars)
@@ -257,7 +264,7 @@ dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, str
   }
   
   # only returning horizons as a data.frame
-  if(!SPC) {
+  if (!SPC) {
     return(as.data.frame(res))
   }
   
@@ -276,8 +283,10 @@ dice <- function(x, fm = NULL, SPC = TRUE, pctMissing = FALSE, fill = FALSE, str
   OBF <- round(f.size / o.size, 1)
   metadata(x)$OBF <- OBF
   
-  # possibly informative 
-  message(sprintf("Object Bloat Factor: %s", OBF))
+  if (verbose) {
+    # turned off by default; possibly informative 
+    message(sprintf("Object Bloat Factor: %s", OBF))
+  }
   
   return(x)
 }
diff --git a/R/genSlabLabels.R b/R/genSlabLabels.R
index 443b2cd28..623f9e1a6 100644
--- a/R/genSlabLabels.R
+++ b/R/genSlabLabels.R
@@ -1,54 +1,54 @@
 ## TODO: documentation / generalization
-# note source data must be normalixed via slice() first, e.g. all share the same number of horizons
+# note source data must be "normalized" via dice() first; assumes each profile has the same number of horizons
 
 # generate labels for slabs
-genSlabLabels <- function(slab.structure=1, max.d, n.profiles) {
+genSlabLabels <- function(slab.structure = 1, max.d, n.profiles) {
   
   # fixed-size slabs
-  if(length(slab.structure) == 1) {
+  if (length(slab.structure) == 1) {
     # generate sequence of segment labels
-    seg.label <- rep(1:ceiling(max.d / slab.structure), each=slab.structure, length=max.d)
+    seg.label <- rep(1:ceiling(max.d / slab.structure), each = slab.structure, length = max.d)
+    
     # general segment labels
-    seg.label.levels <- tapply(1:max.d, seg.label, function(i) {r <- range(i); paste(c(r[1]-1, r[2]), collapse='-') } )
+    seg.label.levels <- tapply(1:max.d, seg.label, 
+                               function(i) {
+                                 r <- range(i, na.rm = TRUE)
+                                 paste(c(r[1] - 1, r[2]), collapse = '-')
+                               })
   }
   
   # user-defined slabs
-  if(length(slab.structure) > 1) {
-    # trival case where segments start from 0
-    if(slab.structure[1] == 0 & length(slab.structure) > 2)
-      seg.label <- rep(slab.structure[-1], times=diff(slab.structure))[1:max.d]
+  if (length(slab.structure) > 1) {
+    if (slab.structure[1] == 0 & length(slab.structure) > 2) {
+      # trivial case where segments start from 0
+      
+      
+      seg.label <- rep(slab.structure[-1], times = diff(slab.structure))[1:max.d]
     
-    # other case: user defines an arbitrary lower and upper limit
-    else {
-      if(length(slab.structure) != 2)
+    } else {
+      # other case: user defines an arbitrary lower and upper limit
+      if (length(slab.structure) != 2)
         stop('user-defined slab boundaries must either start from 0, or contain two values between 0 and the max soil depth')
       
-      # proceed
+      # calculate thickness of each slab
       slab.thickness <- diff(slab.structure)
-      # how many slices of NA before the slab?
-      padding.before <- rep(NA, times=slab.structure[1])
-      # how many slices of NA afer the slab
-      padding.after <- rep(NA, times=abs(max.d - slab.structure[2]))
+      
       # make a new label for the slab
-      new.label <- paste(slab.structure, collapse='-')
+      new.label <- paste(slab.structure, collapse = '-')
+      
       # generate an index for the slab
-      slab.idx <- rep(new.label, times=slab.thickness)
-      # generate the entire index: padding+slab+padding = total number of slices (max_d)
-      # seg.label <- c(padding.before, slab.idx, padding.after)
+      slab.idx <- rep(new.label, times = slab.thickness)
       seg.label <- slab.idx 
     }
     
     # generate segment labels	
-    seg.label.levels <- sapply(1:(length(slab.structure)-1), function(i) paste(c(slab.structure[i], slab.structure[i+1]), collapse='-'))
+    seg.label.levels <- sapply(1:(length(slab.structure) - 1), function(i) {
+      paste(c(slab.structure[i], slab.structure[i + 1]), collapse = '-')
+    })
   }
   
   # covert into a factor that can be used to split profiles into slabs
-  res <- factor(rep(seg.label, times=n.profiles), labels=seg.label.levels)
+  res <- factor(rep(seg.label, times = n.profiles), labels = seg.label.levels)
   
   return(res)
 }
-
-
-
-
-
diff --git a/R/slab.R b/R/slab.R
index 5fb326d6f..55de576ab 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -5,16 +5,14 @@
 # this type of function is compatible with aggregate()
 .slab.fun.factor.default <- function(values, cpm) {
 
-	# probabilities are relative to number of contributing profiles
-	if(cpm == 1) {
-		tb <- table(values, useNA='no')
+  if (cpm == 1) {
+	  # probabilities are relative to number of contributing profiles
+    tb <- table(values, useNA = 'no')
 		# convert to proportions
 		pt <- prop.table(tb)
-	}
-
-	# probabilities are relative to total number of profiles
-	else if(cpm == 2) {
-		tb <- table(values, useNA='always')
+	} else if (cpm == 2) {
+	  # probabilities are relative to total number of profiles
+	  tb <- table(values, useNA = 'always')
 		# convert to proportions,
 		# the last column will be named 'NA', and contains the tally of NAs --> remove it
 		pt <- prop.table(tb)
@@ -27,12 +25,10 @@
 	names(pt) <- make.names(levels(values))
 
 	return(pt)
-	}
-
+}
 
 ## TODO: make these exported functions with documentation (https://github.com/ncss-tech/aqp/issues/99)
 
-
 # default slab function for continuous variables
 # returns a named vector of results
 # this type of function is compatible with aggregate()
@@ -40,34 +36,33 @@
 	q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
 	## 2019-10-30: dropping Hmisc as a suggested package
 	# res <- Hmisc::hdquantile(values, probs=q.probs, na.rm=TRUE)
-	res <- quantile(values, probs=q.probs, na.rm=TRUE)
-	names(res) <- paste('p.q', round(q.probs * 100), sep='')
+	res <- quantile(values, probs = q.probs, na.rm = TRUE)
+	names(res) <- paste('p.q', round(q.probs * 100), sep = '')
 	return(res)
 }
 
 # easy specification of Hmisc::hdquantile if available
 .slab.fun.numeric.HD <- function(values) {
   # sanity check, need this for color distance eval
-  if(!requireNamespace('Hmisc', quietly = TRUE))
-    stop('pleast install the `Hmisc` package.', call.=FALSE)
-
+  if (!requireNamespace('Hmisc', quietly = TRUE))
+    stop('please install the `Hmisc` package to use `hdquantile()` method', call. = FALSE)
+  
   q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
-
-  res <- Hmisc::hdquantile(values, probs=q.probs, na.rm=TRUE)
-
-  names(res) <- paste('p.q', round(q.probs * 100), sep='')
+  
+  res <- Hmisc::hdquantile(values, probs = q.probs, na.rm = TRUE)
+  
+  names(res) <- paste('p.q', round(q.probs * 100), sep = '')
   return(res)
 }
 
 # basic quantile evaluation, better for large datasets
 .slab.fun.numeric.fast <- function(values) {
 	q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
-	res <- quantile(values, probs=q.probs, na.rm=TRUE)
-	names(res) <- paste('p.q', round(q.probs * 100), sep='')
+	res <- quantile(values, probs = q.probs, na.rm = TRUE)
+	names(res) <- paste('p.q', round(q.probs * 100), sep = '')
 	return(res)
 }
 
-
 # ## TODO: not yet implemented
 # # default slab function for weighted, continuous variables
 # # returns a named vector of results
@@ -80,9 +75,6 @@
 # 	return(res)
 # }
 
-
-## note: slab() uses slice() to resample to 1cm slices, to max(x) or slab.structure[2] if defined
-
 # SoilProfileCollection method
 # object: SoilProfileCollection
 # fm: formula defining aggregation
@@ -92,11 +84,14 @@
 # cpm: class probability normalization mode
 # weights: character vector naming column containing weights
 # ... : extra arguments passed on to slab.fun
-
-# this is about 40% slower than the old method
-.slab <- function(object, fm, slab.structure=1, strict=FALSE, slab.fun=.slab.fun.numeric.default, cpm=1, weights=NULL, ...){
-	# issue a message for now that things have changed
-	#	message('usage of slab() has changed considerably, please see the manual page for details')
+.slab <- function(object,
+                  fm,
+                  slab.structure = 1,
+                  strict = FALSE,
+                  slab.fun = .slab.fun.numeric.default,
+                  cpm = 1,
+                  weights = NULL,
+                  ...) {
 
 	# get extra arguments: length of 0 if no extra arguments
 	extra.args <- list(...)
@@ -124,102 +119,70 @@
 	vars <- all.vars(update(fm, 0~.)) # right-hand side
 
 	# sanity check:
-	if(! inherits(fm, "formula"))
-		stop('must provide a valid formula: groups ~ var1 + var2 + ...', call.=FALSE)
-
+	if (!inherits(fm, "formula"))
+	  stop('must provide a valid formula: groups ~ var1 + var2 + ...', call. = FALSE)
+	
 	# check for bogus left/right side problems with the formula
-	if(any(g %in% object.names) == FALSE & g != '.') # bogus grouping column
-		stop('group name either missing from formula, or does match any columns in dataframe', call.=FALSE)
+	if (any(g %in% object.names) == FALSE & g != '.') # bogus grouping column
+	  stop('group name either missing from formula, or does match any columns in dataframe', call. = FALSE)
 
-	if(any(vars %in% object.names) == FALSE) # bogus column names in right-hand side
-		stop('column names in formula do not match column names in dataframe', call.=FALSE)
-
-	
-	## old slice() version ###############################
+	if (any(vars %in% object.names) == FALSE) # bogus column names in right-hand side
+	  stop('column names in formula do not match column names in dataframe', call. = FALSE)
 	
 	# make formula for slicing
-	## TODO: slice() returns an extra row, so only request slices to max-1
-	fm.slice <- formula(paste('0:', max(object)-1, ' ~ ', paste(vars, collapse=' + '), sep=''))
+	fm.slice <- formula(paste('0:', max(object), ' ~ ', paste(vars, collapse = ' + '), sep = ''))
 
 	# short-cut for user-defined slab
-	## TODO: slice() returns an extra row, so only request slices to max-1
-	if(length(slab.structure) == 2 )
-		fm.slice <- formula(paste(slab.structure[1], ':', slab.structure[2]-1, ' ~ ', paste(vars, collapse=' + '), sep=''))
-
-	# slice into 1cm increments, result is a data.frame
-	data <- slice(object, fm.slice, strict=strict, just.the.data=TRUE)
-
-	#######################################################
-	
-	
-	## dice() version  ####################################
-	# https://github.com/ncss-tech/aqp/issues/115
-	
-	## TODO:
-	#  * enforce filling / check assumptions about length
-	#  * consider total re-write vs. adaptation
-	
-	# # make formula for slicing
-	# fm.slice <- formula(paste('0:', max(object), ' ~ ', paste(vars, collapse=' + '), sep=''))
-	# 
-	# # short-cut for user-defined slab
-	# if(length(slab.structure) == 2 )
-	#   fm.slice <- formula(paste(slab.structure[1], ':', slab.structure[2], ' ~ ', paste(vars, collapse=' + '), sep=''))
-	# 
-	# # slice into 1cm increments, result is a data.frame
-	# data <- dice(x = object, fm = fm.slice, strict = strict, SPC = FALSE, pctMissing = TRUE)
-	
-	#######################################################
-	
-	
+	if (length(slab.structure) == 2) {
+	  fm.slice <- formula(paste(slab.structure[1], ':', slab.structure[2], ' ~ ', paste(vars, collapse = ' + '), sep = ''))
+	}
 	
+	# slice into 1cm increments, result is a data.frame
+	data <- dice(x = object, fm = fm.slice, strict = strict, SPC = FALSE, pctMissing = TRUE)
 	
 	# extract site data
 	site.data <- site(object)
 
 	# check groups for NA, if grouping variable was given
-	if(g != '.')
-		if(any(is.na(site.data[, g])))
-			stop('grouping variable must not contain NA', call.=FALSE)
-
+	if (g != '.' && any(is.na(site.data[, g]))) {
+		  stop('grouping variable must not contain NA', call. = FALSE)
+	}
+	
 	### !!! this runs out of memory when groups contains NA !!!
 	## this is generating a lot of extra objects and possibly wasting memory
 	# merge site data back into the result, this would include site-level weights
-	data <- merge(x = data, y = site.data, by=object.ID, all.x=TRUE, sort=FALSE)
+	data <- merge(x = data, y = site.data, by = object.ID, all.x = TRUE, sort = FALSE)
 
 	# clean-up
 	rm(object, site.data)
 	gc()
 
 	# check variable classes
-	if(length(vars) > 1) {
+	if (length(vars) > 1) {
 	  vars.numeric.test <- sapply(data[, vars], is.numeric)
 	}	else {
 	  vars.numeric.test <- is.numeric(data[[vars]])
 	}
 
-
 	# sanity check: all numeric, or single character/factor
-	if(any(! vars.numeric.test) & length(vars) > 1) {
+	if (any(!vars.numeric.test) & length(vars) > 1) {
 	  stop('mixed variable types and multiple categorical variables are not currently supported in the same call to slab', call. = FALSE)
 	}
 
-
 	# check for single categorical variable, and convert to factor
-	if(length(vars) == 1 & inherits(data[, vars], c('character', 'factor'))) {
+	if (length(vars) == 1 & inherits(data[, vars], c('character', 'factor'))) {
 
 		# if we have a character, then convert to factor
-		if(inherits(data[[vars]],'character')) {
+		if (inherits(data[[vars]],'character')) {
 			message('Note: converting categorical variable to factor.')
 			data[[vars]] <- factor(data[[vars]])
 		}
 
 		# check for weights
-		if(!missing(weights)) {
-		  stop('weighted aggregation of categorical variables not yet implemented', call.=FALSE)
+		if (!missing(weights)) {
+		  stop('weighted aggregation of categorical variables not yet implemented', call. = FALSE)
 		}
 
-
 		# re-set default function, currently no user-supply-able option
 		slab.fun <- .slab.fun.factor.default
 
@@ -236,94 +199,70 @@
 	  .factorFlag <- FALSE
 	}
 
-
-	####
-	#### optimization note: use of factor labels could be slowing things down...
-	####
+	## Note: use of factor labels could be slowing things down...
 	## Note: this assumes ordering is correct in source data / segment labels
+	
 	## TODO: make sure that nrow(data) == genSlabLabels(slab.structure = slab.structure, max.d = max.d, n.profiles = n.profiles)
+	##       - I think the source of problems related to genSlabLabels not conforming with number of rows has been fixed -AGB 2022/08/17
+	
 	## TODO: investigate use of split() to speed things up, no need to keep everything in the safe DF:
-	##
-	##         l <- split(data, data$seg.label, drop=FALSE)
-	##
-	## ... parallel processing with furrr
+	##       l <- split(data, data$seg.label, drop=FALSE)
 
 	# add segmenting label to data
  	data$seg.label <- genSlabLabels(slab.structure = slab.structure, max.d = max.d, n.profiles = n.profiles)
 
 	# if there is no left-hand component in the formula, we are aggregating all data in the collection
-	if(g == '.') {
+	if (g == '.') {
 		g <- 'all.profiles' # add new grouping variable to horizons
 		data[, g] <- 1
 	}
 
-
-	##
-	## TODO: adding weighted-aggregate functionality here
-	## we can't use aggregate() for this
-	## we can use data.table methods
-
 	# check for weights
-	if(!missing(weights))
-		stop('weighted aggregation is not yet supported', call.=FALSE)
-
+	if (!missing(weights)) {
+  	## TODO: adding weighted aggregation via data.table
+	  stop('weighted aggregation is not yet supported', call. = FALSE)
+	}
+ 	
  	## TODO: why would this ever happen?
 	# throwing out those rows with an NA segment label
 	seg.label.is.not.NA <- which(!is.na(data$seg.label))
 
-	# convert into long format
-	# d.long.df <- reshape::melt(data[seg.label.is.not.NA, ], id.vars=c(object.ID, 'seg.label', g), measure.vars=vars)
-
-	# convert wide -> long format
-	# using data.table::melt()
-	# note that this will not preserve factor levels when 'vars' is categorical
-	# must call unique() on `id.vars`
+	# convert wide -> long format w/ data.table::melt()
+	# will not preserve factor levels when 'vars' is categorical;  must call unique() on `id.vars`
 	d.long <- melt(
 	  as.data.table(data[seg.label.is.not.NA, ]),
 	  id.vars = unique(c(object.ID, 'seg.label', g)),
 	  measure.vars = vars,
-	  )
+	 )
 
 	# convert back to data.frame
 	d.long <- as.data.frame(d.long)
 
   # reset factor levels in d.long[[value]]
-	if(.factorFlag) {
+	if (.factorFlag) {
 	  d.long[['value']] <- factor(d.long[['value']], levels = original.levels)
 	}
 
 	# make a formula for aggregate()
-	aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep=''))
-
-	##
-	## TODO: this might be the place to implement parallel code in furrr
-	##       1. split into a list based on number of cores/cpus available
-	##       2. aggregate using seg.label + variable in parallel
-	##       3. combine results (a list of data.frames)
-	##
-	## NOPE: use data.table which will automatically scale to multiple threads
-	##
-
-
+	aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep = ''))
 
 	# process chunks according to group -> variable -> segment
 	# NA values are not explicitly dropped
-	if(length(extra.args) == 0)
-		d.slabbed <- aggregate(aggregate.fm, data=d.long, na.action=na.pass, FUN=slab.fun)
-
-	# optionally account for extra arguments
-	else {
-		the.args <- c(list(aggregate.fm, data=d.long, na.action=na.pass, FUN=slab.fun), extra.args)
-		d.slabbed <- do.call(what='aggregate', args=the.args)
+	if (length(extra.args) == 0) {
+	  d.slabbed <- aggregate(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun)
+	} else {
+	  # optionally account for extra arguments
+	  the.args <- c(list(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun), extra.args)
+		d.slabbed <- do.call(what = 'aggregate', args = the.args)
 	}
 
-
 	# if slab.fun returns a vector of length > 1 we must:
 	# convert the complex data.frame returned by aggregate into a regular data.frame
 	# the column 'value' is a matrix with the results of slab.fun
-	if(inherits(d.slabbed$value, 'matrix'))
+	if (inherits(d.slabbed$value, 'matrix')) {
 		d.slabbed <- cbind(d.slabbed[, 1:3], d.slabbed$value)
-
+	}
+	
   # ensure that the names returned from slab.fun are legal
   names(d.slabbed) <- make.names(names(d.slabbed))
 
@@ -333,8 +272,14 @@
 	d.slabbed$bottom <- as.integer(lapply(slab.depths, function(i) i[2]))
 
 	# estimate contributing fraction
-	d.slabbed$contributing_fraction <- aggregate(aggregate.fm, data=d.long, na.action=na.pass, FUN=function(i) {length(na.omit(i)) / length(i)})$value
-
+	d.slabbed$contributing_fraction <- aggregate(
+	    aggregate.fm,
+	    data = d.long,
+	    na.action = na.pass,
+	    FUN = function(i) {
+	      length(na.omit(i)) / length(i)
+	    }
+	  )$value
 
 	# remove seg.label from result
 	d.slabbed$seg.label <- NULL
@@ -342,21 +287,26 @@
   # if categorical data have been aggregated, set an attribute with the original levels
   # this allows one to recover values that are not legal column names
   # e.g. 2Bt is corrupted to X2Bt
-  if(! is.null(original.levels))
+  if (!is.null(original.levels)) {
     attr(d.slabbed, 'original.levels') <- original.levels
-
-	# reset options:
+  }
+	
+	# reset options
 	options(opt.original)
-
-	# done
 	return(d.slabbed)
 }
 
 # setup generic function
-# if (!isGeneric("slab"))
-	setGeneric("slab", function(object, fm, slab.structure=1, strict=FALSE, slab.fun=.slab.fun.numeric.default, cpm=1, weights=NULL, ...) standardGeneric("slab"))
+setGeneric("slab", function(object,
+                              fm,
+                              slab.structure = 1,
+                              strict = FALSE,
+                              slab.fun = .slab.fun.numeric.default,
+                              cpm = 1,
+                              weights = NULL,
+                              ...)
+    standardGeneric("slab"))
 
-# method dispatch
 #' Slab-Wise Aggregation of SoilProfileCollection Objects
 #'
 #' Aggregate soil properties along user-defined `slabs`, and optionally within
@@ -373,6 +323,8 @@
 #' \code{stats::quantile} from the Hmisc package, which requires at least 2
 #' observations per chunk. Note that if `group` is a factor it must not contain
 #' NAs.
+#' 
+#' `slab()` uses `dice()` to "resample" profiles to 1cm slices from depth 0 to `max(x)` (or `slab.structure[2]`, if defined).
 #'
 #' Sometimes \code{slab} is used to conveniently re-arrange data vs. aggregate.
 #' This is performed by specifying \code{identity} in \code{slab.fun}. See
@@ -526,7 +478,6 @@
 #'        scales=list(x=list(alternating=1))
 #' )
 #'
-#'
 #' ##
 #' ## categorical variable example
 #' ##
@@ -780,6 +731,7 @@
 #'
 #'
 #'
+# # TODO: update this when weights are implemented
 #' ## weighted-aggregation -- NOT YET IMPLEMENTED --
 #' # load sample data, upgrade to SoilProfileCollection
 #' data(sp1)
@@ -790,4 +742,6 @@
 #' sp1$site.wts <- runif(n=length(sp1), min=20, max=100)
 #' }
 #'
-setMethod(f='slab', signature='SoilProfileCollection', definition=.slab)
+setMethod(f = 'slab',
+          signature = 'SoilProfileCollection',
+          definition = .slab)

From 05fa8fc10ea8827a1a87967addab062f40277fd8 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Wed, 17 Aug 2022 17:11:48 -0700
Subject: [PATCH 2/7] dice docs

---
 R/dice.R    | 2 ++
 man/dice.Rd | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/dice.R b/R/dice.R
index 390768623..60335e783 100644
--- a/R/dice.R
+++ b/R/dice.R
@@ -27,6 +27,8 @@
 #' 
 #' @param byhz Evaluate horizon depth logic at the horizon level (`TRUE`) or profile level (`FALSE`). Invalid depth logic invokes `HzDepthLogicSubset` which removes offending profiles or horizon records.
 #' 
+#' @param verbose Print information about object size/memory usage. Default: `FALSE`
+#' 
 #' @details For large and potentially messy collections that include missing horizon bottom depths, or 0-thickness horions, consider using `repairMissingHzDepths()` before `dice()`.
 #' 
 #'
diff --git a/man/dice.Rd b/man/dice.Rd
index 6ac60c727..ce17dafe0 100644
--- a/man/dice.Rd
+++ b/man/dice.Rd
@@ -11,7 +11,8 @@ dice(
   pctMissing = FALSE,
   fill = FALSE,
   strict = TRUE,
-  byhz = FALSE
+  byhz = FALSE,
+  verbose = FALSE
 )
 }
 \arguments{
@@ -28,6 +29,8 @@ dice(
 \item{strict}{perform horizon depth logic checking / flagging / removal}
 
 \item{byhz}{Evaluate horizon depth logic at the horizon level (\code{TRUE}) or profile level (\code{FALSE}). Invalid depth logic invokes \code{HzDepthLogicSubset} which removes offending profiles or horizon records.}
+
+\item{verbose}{Print information about object size/memory usage. Default: \code{FALSE}}
 }
 \value{
 a \code{SoilProfileCollection} object, or \code{data.frame} when \code{SPC = FALSE}

From 8cd031179e80c87be11b425d758ed6bea0b4ea36 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Wed, 17 Aug 2022 17:01:43 -0700
Subject: [PATCH 3/7] `slab()`: implement `weights` argument

 - Add `slab_function()`
---
 R/slab.R                   | 236 +++++++++++++++++++------------------
 man/slab-methods.Rd        |  16 ++-
 tests/testthat/test-slab.R |  27 ++++-
 3 files changed, 163 insertions(+), 116 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index 55de576ab..2639f2b01 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -1,5 +1,3 @@
-
-
 # default slab function for categorical variables
 # returns a named vector of results
 # this type of function is compatible with aggregate()
@@ -27,54 +25,54 @@
 	return(pt)
 }
 
-## TODO: make these exported functions with documentation (https://github.com/ncss-tech/aqp/issues/99)
-
+.slab.fun.factor.weighted <- function(values, weights) {
+  # TODO
+}
+  
 # default slab function for continuous variables
 # returns a named vector of results
 # this type of function is compatible with aggregate()
-.slab.fun.numeric.default <- function(values) {
-	q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
-	## 2019-10-30: dropping Hmisc as a suggested package
-	# res <- Hmisc::hdquantile(values, probs=q.probs, na.rm=TRUE)
-	res <- quantile(values, probs = q.probs, na.rm = TRUE)
-	names(res) <- paste('p.q', round(q.probs * 100), sep = '')
-	return(res)
+.slab.fun.numeric.default <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95)) {
+  .slab.fun.numeric.fast(values, probs = probs)
+}
+
+# for a site or horizon level weight column, use Hmisc::wtd.quantile
+#   note that "frequency weights" are assumed to be most common use case, so normwt=FALSE by default, 
+#   normwt=TRUE can be passed as optional argument for slab.fun from slab() interface
+.slab.fun.numeric.weighted <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), wt, normwt = FALSE) {
+  if (!requireNamespace('Hmisc', quietly = TRUE))
+    stop('please install the `Hmisc` package to use `wtd.quantile()` method', call. = FALSE)
+  res <- try(Hmisc::wtd.quantile(values, weights = wt, probs = probs, na.rm = TRUE, normwt = normwt), silent = TRUE)
+  if (inherits(res, 'try-error') && grepl("zero non-NA points", res[1])) {
+    res <- rep(NA_real_, length(probs))
+  } else if (inherits(res, 'try-error')) {
+    print(res)
+    return(NULL)
+  } else {
+    names(res) <- paste('p.q', round(probs * 100), sep = '')
+  }
+  return(res)
 }
 
 # easy specification of Hmisc::hdquantile if available
-.slab.fun.numeric.HD <- function(values) {
+.slab.fun.numeric.HD <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95)) {
   # sanity check, need this for color distance eval
   if (!requireNamespace('Hmisc', quietly = TRUE))
     stop('please install the `Hmisc` package to use `hdquantile()` method', call. = FALSE)
   
-  q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
-  
-  res <- Hmisc::hdquantile(values, probs = q.probs, na.rm = TRUE)
+  res <- Hmisc::hdquantile(values, probs = probs, na.rm = TRUE)
   
-  names(res) <- paste('p.q', round(q.probs * 100), sep = '')
+  names(res) <- paste('p.q', round(probs * 100), sep = '')
   return(res)
 }
 
-# basic quantile evaluation, better for large datasets
-.slab.fun.numeric.fast <- function(values) {
-	q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
-	res <- quantile(values, probs = q.probs, na.rm = TRUE)
-	names(res) <- paste('p.q', round(q.probs * 100), sep = '')
-	return(res)
+# basic quantile evaluation, better for large data sets
+.slab.fun.numeric.fast <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95)) {
+  res <- quantile(values, probs = probs, na.rm = TRUE)
+  names(res) <- paste('p.q', round(probs * 100), sep = '')
+  return(res)
 }
 
-# ## TODO: not yet implemented
-# # default slab function for weighted, continuous variables
-# # returns a named vector of results
-# # this type of function is compatible with aggregate()
-# # NOTE: nw (normalize-weights) argument will affect the results!
-# .slab.fun.wtd.numeric.default <- function(values, w, nw=TRUE) {
-# 	q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
-# 	res <- wtd.quantile(values, weights=w, probs=q.probs, na.rm=TRUE, normwt=nw)
-# 	names(res) <- paste('p.q', round(q.probs * 100), sep='')
-# 	return(res)
-# }
-
 # SoilProfileCollection method
 # object: SoilProfileCollection
 # fm: formula defining aggregation
@@ -82,23 +80,23 @@
 # progress: plyr-progress display
 # slab.fun: aggregate function applied to data chunks (must return a single row / chunk)
 # cpm: class probability normalization mode
-# weights: character vector naming column containing weights
+# weights: character vector naming column in site slot containing weights
 # ... : extra arguments passed on to slab.fun
 .slab <- function(object,
                   fm,
                   slab.structure = 1,
                   strict = FALSE,
-                  slab.fun = .slab.fun.numeric.default,
+                  slab.fun = slab_function(method = "numeric"),
                   cpm = 1,
                   weights = NULL,
                   ...) {
 
+  # define keywords for data.table
+  .SD <- NULL; .N <- NULL
+  
 	# get extra arguments: length of 0 if no extra arguments
 	extra.args <- list(...)
 
-	## important: change the default behavior of data.frame and melt
-	opt.original <- options(stringsAsFactors = FALSE)
-
 	# get unique list of names in object
 	object.names <- unique(unlist(names(object)))
 
@@ -117,7 +115,7 @@
 	# extract components of the formula:
 	g <- all.vars(update(fm, .~0)) # left-hand side
 	vars <- all.vars(update(fm, 0~.)) # right-hand side
-
+	
 	# sanity check:
 	if (!inherits(fm, "formula"))
 	  stop('must provide a valid formula: groups ~ var1 + var2 + ...', call. = FALSE)
@@ -145,18 +143,12 @@
 
 	# check groups for NA, if grouping variable was given
 	if (g != '.' && any(is.na(site.data[, g]))) {
-		  stop('grouping variable must not contain NA', call. = FALSE)
+		stop('grouping variable must not contain NA', call. = FALSE)
 	}
 	
-	### !!! this runs out of memory when groups contains NA !!!
-	## this is generating a lot of extra objects and possibly wasting memory
 	# merge site data back into the result, this would include site-level weights
 	data <- merge(x = data, y = site.data, by = object.ID, all.x = TRUE, sort = FALSE)
 
-	# clean-up
-	rm(object, site.data)
-	gc()
-
 	# check variable classes
 	if (length(vars) > 1) {
 	  vars.numeric.test <- sapply(data[, vars], is.numeric)
@@ -180,7 +172,7 @@
 
 		# check for weights
 		if (!missing(weights)) {
-		  stop('weighted aggregation of categorical variables not yet implemented', call. = FALSE)
+		  slab.fun <- .slab.fun.factor.weighted
 		}
 
 		# re-set default function, currently no user-supply-able option
@@ -216,71 +208,75 @@
 		g <- 'all.profiles' # add new grouping variable to horizons
 		data[, g] <- 1
 	}
-
+ 	
+ 	# convert wide -> long format
+ 	d.long <- data.table::melt(
+ 	  as.data.table(data[which(!is.na(data$seg.label)), ]),
+ 	  id.vars = unique(c(object.ID, 'seg.label', g, weights)),
+ 	  measure.vars = vars
+ 	)
+ 	
+ 	# make a formula for aggregate()
+ 	aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep = ''))
+ 	
 	# check for weights
 	if (!missing(weights)) {
-  	## TODO: adding weighted aggregation via data.table
-	  stop('weighted aggregation is not yet supported', call. = FALSE)
-	}
- 	
- 	## TODO: why would this ever happen?
-	# throwing out those rows with an NA segment label
-	seg.label.is.not.NA <- which(!is.na(data$seg.label))
-
-	# convert wide -> long format w/ data.table::melt()
-	# will not preserve factor levels when 'vars' is categorical;  must call unique() on `id.vars`
-	d.long <- melt(
-	  as.data.table(data[seg.label.is.not.NA, ]),
-	  id.vars = unique(c(object.ID, 'seg.label', g)),
-	  measure.vars = vars,
-	 )
-
-	# convert back to data.frame
-	d.long <- as.data.frame(d.long)
-
-  # reset factor levels in d.long[[value]]
-	if (.factorFlag) {
-	  d.long[['value']] <- factor(d.long[['value']], levels = original.levels)
-	}
-
-	# make a formula for aggregate()
-	aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep = ''))
+	  if (missing(slab.fun)) {
+	    # default _weighted_ aggregation fun is .slab.fun.numeric.weighted
+	    FUN <- .slab.fun.numeric.weighted
+	    # custom weighted aggregation fun should take first argument as values w. named "weights" argument
+	  } else FUN <- slab.fun
+	  
+	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, wt = weights))), by = c('variable', g, 'seg.label')])
+	  d.slabbed$variable
+	  d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1
 
-	# process chunks according to group -> variable -> segment
-	# NA values are not explicitly dropped
-	if (length(extra.args) == 0) {
-	  d.slabbed <- aggregate(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun)
 	} else {
-	  # optionally account for extra arguments
-	  the.args <- c(list(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun), extra.args)
-		d.slabbed <- do.call(what = 'aggregate', args = the.args)
+	  # convert back to data.frame
+	  d.long <- as.data.frame(d.long)
+	  
+	  # reset factor levels in d.long[[value]]
+	  if (.factorFlag) {
+	    d.long[['value']] <- factor(d.long[['value']], levels = original.levels)
+	  }
+	  
+	  # process chunks according to group -> variable -> segment
+	  # NA values are not explicitly dropped
+	  if (length(extra.args) == 0) {
+	    d.slabbed <- aggregate(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun)
+	  } else {
+	    # optionally account for extra arguments
+	    the.args <- c(list(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun), extra.args)
+	    d.slabbed <- do.call(what = 'aggregate', args = the.args)
+	  }
+	  
+	  # if slab.fun returns a vector of length > 1 we must:
+	  # convert the complex data.frame returned by aggregate into a regular data.frame
+	  # the column 'value' is a matrix with the results of slab.fun
+	  if (inherits(d.slabbed$value, 'matrix')) {
+	    d.slabbed <- cbind(d.slabbed[, 1:3], d.slabbed$value)
+	  }
+	  
+	  # ensure that the names returned from slab.fun are legal
+	  names(d.slabbed) <- make.names(names(d.slabbed)) 
+	  
+   	# estimate contributing fraction
+   	d.slabbed$contributing_fraction <- aggregate(
+   	  aggregate.fm,
+   	  data = d.long,
+   	  na.action = na.pass,
+   	  FUN = function(i) {
+   	    length(na.omit(i)) / length(i)
+   	  }
+   	)$value
+   	
 	}
-
-	# if slab.fun returns a vector of length > 1 we must:
-	# convert the complex data.frame returned by aggregate into a regular data.frame
-	# the column 'value' is a matrix with the results of slab.fun
-	if (inherits(d.slabbed$value, 'matrix')) {
-		d.slabbed <- cbind(d.slabbed[, 1:3], d.slabbed$value)
-	}
-	
-  # ensure that the names returned from slab.fun are legal
-  names(d.slabbed) <- make.names(names(d.slabbed))
-
-	# convert the slab.label column into top/bottom as integers
-	slab.depths <- strsplit(as.character(d.slabbed$seg.label), '-')
-	d.slabbed$top <- as.integer(lapply(slab.depths, function(i) i[1]))
-	d.slabbed$bottom <- as.integer(lapply(slab.depths, function(i) i[2]))
-
-	# estimate contributing fraction
-	d.slabbed$contributing_fraction <- aggregate(
-	    aggregate.fm,
-	    data = d.long,
-	    na.action = na.pass,
-	    FUN = function(i) {
-	      length(na.omit(i)) / length(i)
-	    }
-	  )$value
-
+ 	
+ 	# convert the slab.label column into top/bottom as integers
+ 	slab.depths <- strsplit(as.character(d.slabbed$seg.label), '-')
+ 	d.slabbed$top <- as.integer(lapply(slab.depths, function(i) i[1]))
+ 	d.slabbed$bottom <- as.integer(lapply(slab.depths, function(i) i[2]))
+ 	
 	# remove seg.label from result
 	d.slabbed$seg.label <- NULL
 
@@ -291,8 +287,6 @@
     attr(d.slabbed, 'original.levels') <- original.levels
   }
 	
-	# reset options
-	options(opt.original)
 	return(d.slabbed)
 }
 
@@ -301,7 +295,7 @@ setGeneric("slab", function(object,
                               fm,
                               slab.structure = 1,
                               strict = FALSE,
-                              slab.fun = .slab.fun.numeric.default,
+                              slab.fun = slab_function(method = "numeric"),
                               cpm = 1,
                               weights = NULL,
                               ...)
@@ -368,7 +362,7 @@ setGeneric("slab", function(object,
 #' either: number of profiles with data at the current slice (cpm=1), or by the
 #' number of profiles in the collection (cpm=2). Mode 1 values will always sum
 #' to the contributing fraction, while mode 2 values will always sum to 1.
-#' @param weights Column name containing weights. NOT YET IMPLEMENTED
+#' @param weights Column name containing site-level weights
 #' @param \dots further arguments passed to \code{slab.fun}
 #' @return Output is returned in long format, such that slice-wise aggregates
 #' are returned once for each combination of grouping level (optional),
@@ -745,3 +739,21 @@ setGeneric("slab", function(object,
 setMethod(f = 'slab',
           signature = 'SoilProfileCollection',
           definition = .slab)
+
+#' @param method one of `"numeric"`, `"factor"`, `"hd"`, `"weighted.numeric"`, `"weighted.factor"`, `"fast"`
+#' @details `slab_function()`: The default `"numeric"` aggregation method is the `"fast"` numeric (quantile) method. Additional methods include `"factor"` for categorical data, `"hd"` to use the Harrell-Davis Distribution-Free Quantile Estimator from the Hmisc package, and "`weighted`" to use a weighted quantile method from the Hmisc package
+#' @return `slab_function()`: return an aggregation function based on the `method` argument
+#' @rdname slab-methods
+#' @export
+slab_function <- function(method = c("numeric", "factor", "hd", "weighted.numeric", "weighted.factor", "fast")) {
+  switch(method,
+         numeric = .slab.fun.numeric.default, # uses "fast" numeric method
+         factor = .slab.fun.factor.default, 
+         weighted.factor = .slab.fun.factor.weighted, 
+         hd = .slab.fun.numeric.HD, 
+         weighted.numeric = .slab.fun.numeric.weighted,
+         weighted.factor = .slab.fun.numeric.weighted,
+         fast = .slab.fun.numeric.fast,
+         .slab.fun.numeric.default
+  )
+}
diff --git a/man/slab-methods.Rd b/man/slab-methods.Rd
index 305a43998..d0a626b95 100644
--- a/man/slab-methods.Rd
+++ b/man/slab-methods.Rd
@@ -7,6 +7,7 @@
 \alias{slab2}
 \alias{genSlabLabels}
 \alias{slab,SoilProfileCollection-method}
+\alias{slab_function}
 \title{Slab-Wise Aggregation of SoilProfileCollection Objects}
 \usage{
 \S4method{slab}{SoilProfileCollection}(
@@ -14,11 +15,13 @@
   fm,
   slab.structure = 1,
   strict = FALSE,
-  slab.fun = .slab.fun.numeric.default,
+  slab.fun = slab_function(method = "default"),
   cpm = 1,
   weights = NULL,
   ...
 )
+
+slab_function(method = c("default", "factor", "hd", "weighted", "fast"))
 }
 \arguments{
 \item{object}{a SoilProfileCollection}
@@ -42,9 +45,11 @@ either: number of profiles with data at the current slice (cpm=1), or by the
 number of profiles in the collection (cpm=2). Mode 1 values will always sum
 to the contributing fraction, while mode 2 values will always sum to 1.}
 
-\item{weights}{Column name containing weights. NOT YET IMPLEMENTED}
+\item{weights}{Column name containing site-level weights}
 
 \item{\dots}{further arguments passed to \code{slab.fun}}
+
+\item{method}{one of \code{"default"}, \code{"factor"}, \code{"hd"}, \code{"weighted"}, \code{"fast"}}
 }
 \value{
 Output is returned in long format, such that slice-wise aggregates
@@ -74,6 +79,8 @@ fraction of profiles contributing to the aggregate value, ranges from
 1/n_profiles to 1.}
 
 }
+
+\code{slab_function()}: return an aggregation function based on the \code{method} argument
 }
 \description{
 Aggregate soil properties along user-defined \code{slabs}, and optionally within
@@ -92,6 +99,8 @@ mean, mean-1SD and mean+1SD. The default slab function wraps
 observations per chunk. Note that if \code{group} is a factor it must not contain
 NAs.
 
+\code{slab()} uses \code{dice()} to "resample" profiles to 1cm slices from depth 0 to \code{max(x)} (or \code{slab.structure[2]}, if defined).
+
 Sometimes \code{slab} is used to conveniently re-arrange data vs. aggregate.
 This is performed by specifying \code{identity} in \code{slab.fun}. See
 examples beflow for a demonstration of this functionality.
@@ -114,6 +123,8 @@ integers}{e.g. c(50, 60): data are aggregated over depths spanning 50--60
 units} \item{a vector of 3 or more integers}{e.g. c(0, 5, 10, 50, 100): data
 are aggregated over the depths spanning 0--5, 5--10, 10--50, 50--100 units}
 }
+
+\code{slab_function()}: The \code{"default"} aggregation method is the \code{"fast"} numeric (quantile) method. Additional methods include \code{"factor"} for categorical data, \code{"hd"} to use the Harrell-Davis Distribution-Free Quantile Estimator from the Hmisc package, and "\code{weighted}" to use a weighted quantile method from the Hmisc package
 }
 \note{
 Arguments to \code{slab} have changed with \code{aqp} 1.5 (2012-12-29)
@@ -191,7 +202,6 @@ xyplot(top ~ value | id, data=a, ylab='Depth',
        scales=list(x=list(alternating=1))
 )
 
-
 ##
 ## categorical variable example
 ##
diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R
index c45537f79..af455668d 100644
--- a/tests/testthat/test-slab.R
+++ b/tests/testthat/test-slab.R
@@ -35,7 +35,32 @@ test_that("basic slab functionality", {
   expect_true(any(grepl('p.q', nm)))
 })
 
-
+test_that("extended slab functionality: weighted aggregation", {
+  
+  data(sp1, package = 'aqp')
+  depths(sp1) <- id ~ top + bottom
+  
+  sp1$weights <- rep(1:2, length(sp1))[1:length(sp1)]
+  sp1$wtgrp <- rep(1, length(sp1))
+  
+  # we expect quantile estimates to vary (given weighted v.s. unweighted)
+  a.0 <- slab(sp1, fm = ~ prop, weights = "weights")
+  a.1 <- slab(sp1, fm = ~ prop, strict = TRUE, weights = "weights")
+  a.2 <- slab(sp1, fm = wtgrp ~ prop, strict = TRUE, weights = "weights")
+  a.3 <- slab(sp1, fm = wtgrp ~ prop, strict = TRUE)
+  
+  # expect consistent structure for weighted/unweighted: same column names except for group
+  ungroupcols <- colnames(a.3)
+  ungroupcols[2] <- "all.profiles"
+  expect_equal(colnames(a.0), ungroupcols)
+  expect_equal(colnames(a.1), ungroupcols)
+  expect_equal(colnames(a.2), colnames(a.3))
+  
+  # contributing fractions should be identical
+  expect_true(all(a.1$contributing_fraction == a.2$contributing_fraction))
+  expect_true(all(a.2$contributing_fraction == a.3$contributing_fraction))
+  
+})
 
 test_that("slab calculations: mean, single profile", {
   

From 2cb89ca409155f48a4df8c103e7c21ca85d1054d Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Thu, 18 Aug 2022 11:42:46 -0700
Subject: [PATCH 4/7] cleanup / fixing edge cases / more tests

---
 R/genSlabLabels.R          |  6 ++----
 R/slab.R                   | 23 +++++++++++++----------
 man/slab-methods.Rd        | 10 ++++++----
 tests/testthat/test-slab.R | 28 ++++++++++++++++++++++++++--
 4 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/R/genSlabLabels.R b/R/genSlabLabels.R
index 623f9e1a6..f5c234ee8 100644
--- a/R/genSlabLabels.R
+++ b/R/genSlabLabels.R
@@ -22,7 +22,6 @@ genSlabLabels <- function(slab.structure = 1, max.d, n.profiles) {
     if (slab.structure[1] == 0 & length(slab.structure) > 2) {
       # trivial case where segments start from 0
       
-      
       seg.label <- rep(slab.structure[-1], times = diff(slab.structure))[1:max.d]
     
     } else {
@@ -30,15 +29,14 @@ genSlabLabels <- function(slab.structure = 1, max.d, n.profiles) {
       if (length(slab.structure) != 2)
         stop('user-defined slab boundaries must either start from 0, or contain two values between 0 and the max soil depth')
       
-      # calculate thickness of each slab
+      # calculate thickness of slab
       slab.thickness <- diff(slab.structure)
       
       # make a new label for the slab
       new.label <- paste(slab.structure, collapse = '-')
       
-      # generate an index for the slab
       slab.idx <- rep(new.label, times = slab.thickness)
-      seg.label <- slab.idx 
+      seg.label <- slab.idx
     }
     
     # generate segment labels	
diff --git a/R/slab.R b/R/slab.R
index 2639f2b01..312a5c8cd 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -92,7 +92,7 @@
                   ...) {
 
   # define keywords for data.table
-  .SD <- NULL; .N <- NULL
+  .SD <- NULL; .N <- NULL; value <- NULL
   
 	# get extra arguments: length of 0 if no extra arguments
 	extra.args <- list(...)
@@ -113,8 +113,8 @@
 	original.levels <- NULL
 
 	# extract components of the formula:
-	g <- all.vars(update(fm, .~0)) # left-hand side
-	vars <- all.vars(update(fm, 0~.)) # right-hand side
+	g <- all.vars(update(fm, . ~ 0)) # left-hand side
+	vars <- all.vars(update(fm, 0 ~ .)) # right-hand side
 	
 	# sanity check:
 	if (!inherits(fm, "formula"))
@@ -128,15 +128,22 @@
 	  stop('column names in formula do not match column names in dataframe', call. = FALSE)
 	
 	# make formula for slicing
-	fm.slice <- formula(paste('0:', max(object), ' ~ ', paste(vars, collapse = ' + '), sep = ''))
-
-	# short-cut for user-defined slab
 	if (length(slab.structure) == 2) {
+  	# user-defined single slab
 	  fm.slice <- formula(paste(slab.structure[1], ':', slab.structure[2], ' ~ ', paste(vars, collapse = ' + '), sep = ''))
+	} else {
+	  fm.slice <- formula(paste('0:', max(object), ' ~ ', paste(vars, collapse = ' + '), sep = ''))
 	}
 	
 	# slice into 1cm increments, result is a data.frame
 	data <- dice(x = object, fm = fm.slice, strict = strict, SPC = FALSE, pctMissing = TRUE)
+  
+	# Note: in this case we need to subtract the extra slice included by slice()/dice()
+  # do it to sliced result so that the genSlabLabels have the correct length
+  ldx <- data[[horizonDepths(object)[2]]] > slab.structure[2]
+	if (length(slab.structure) == 2 && any(ldx, na.rm = TRUE)) {
+	  data <- data[which(!ldx), ]  
+	}
 	
 	# extract site data
 	site.data <- site(object)
@@ -193,10 +200,6 @@
 
 	## Note: use of factor labels could be slowing things down...
 	## Note: this assumes ordering is correct in source data / segment labels
-	
-	## TODO: make sure that nrow(data) == genSlabLabels(slab.structure = slab.structure, max.d = max.d, n.profiles = n.profiles)
-	##       - I think the source of problems related to genSlabLabels not conforming with number of rows has been fixed -AGB 2022/08/17
-	
 	## TODO: investigate use of split() to speed things up, no need to keep everything in the safe DF:
 	##       l <- split(data, data$seg.label, drop=FALSE)
 
diff --git a/man/slab-methods.Rd b/man/slab-methods.Rd
index d0a626b95..618c6cde1 100644
--- a/man/slab-methods.Rd
+++ b/man/slab-methods.Rd
@@ -15,13 +15,15 @@
   fm,
   slab.structure = 1,
   strict = FALSE,
-  slab.fun = slab_function(method = "default"),
+  slab.fun = slab_function(method = "numeric"),
   cpm = 1,
   weights = NULL,
   ...
 )
 
-slab_function(method = c("default", "factor", "hd", "weighted", "fast"))
+slab_function(
+  method = c("numeric", "factor", "hd", "weighted.numeric", "weighted.factor", "fast")
+)
 }
 \arguments{
 \item{object}{a SoilProfileCollection}
@@ -49,7 +51,7 @@ to the contributing fraction, while mode 2 values will always sum to 1.}
 
 \item{\dots}{further arguments passed to \code{slab.fun}}
 
-\item{method}{one of \code{"default"}, \code{"factor"}, \code{"hd"}, \code{"weighted"}, \code{"fast"}}
+\item{method}{one of \code{"numeric"}, \code{"factor"}, \code{"hd"}, \code{"weighted.numeric"}, \code{"weighted.factor"}, \code{"fast"}}
 }
 \value{
 Output is returned in long format, such that slice-wise aggregates
@@ -124,7 +126,7 @@ units} \item{a vector of 3 or more integers}{e.g. c(0, 5, 10, 50, 100): data
 are aggregated over the depths spanning 0--5, 5--10, 10--50, 50--100 units}
 }
 
-\code{slab_function()}: The \code{"default"} aggregation method is the \code{"fast"} numeric (quantile) method. Additional methods include \code{"factor"} for categorical data, \code{"hd"} to use the Harrell-Davis Distribution-Free Quantile Estimator from the Hmisc package, and "\code{weighted}" to use a weighted quantile method from the Hmisc package
+\code{slab_function()}: The default \code{"numeric"} aggregation method is the \code{"fast"} numeric (quantile) method. Additional methods include \code{"factor"} for categorical data, \code{"hd"} to use the Harrell-Davis Distribution-Free Quantile Estimator from the Hmisc package, and "\code{weighted}" to use a weighted quantile method from the Hmisc package
 }
 \note{
 Arguments to \code{slab} have changed with \code{aqp} 1.5 (2012-12-29)
diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R
index af455668d..fc2993e9f 100644
--- a/tests/testthat/test-slab.R
+++ b/tests/testthat/test-slab.R
@@ -1,7 +1,5 @@
 context("slab method for SoilProfileCollection objects")
 
-
-
 test_that("basic slab functionality", {
   
   data(sp1, package = 'aqp')
@@ -35,6 +33,32 @@ test_that("basic slab functionality", {
   expect_true(any(grepl('p.q', nm)))
 })
 
+test_that("slab structure for a single slab", {
+  data(sp4, package = 'aqp')
+  depths(sp4) <- id ~ top + bottom
+  
+  sp4$group <- c(rep('A',5), rep('B',5))
+  a.1 <- slab(
+    sp4,
+    fm = ~ sand + silt + clay,
+    slab.structure = c(0, 10),
+    slab.fun = mean,
+    na.rm = TRUE
+  )
+  expect_equal(nrow(a.1), 3)
+  
+  # again, this time within groups defined by a site-level attribute:
+  a.1 <- slab(
+    sp4,
+    fm = group ~ sand + silt + clay,
+    slab.structure = c(0, 10),
+    slab.fun = mean,
+    na.rm = TRUE
+  )
+  expect_equal(nrow(a.1), 6)
+})
+
+
 test_that("extended slab functionality: weighted aggregation", {
   
   data(sp1, package = 'aqp')

From 669fc9a239065db86fd3ca813386b34de4178513 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Thu, 18 Aug 2022 14:56:28 -0700
Subject: [PATCH 5/7] fixweighted average/quantile methods  - add test of
 "component weighted averages"

---
 R/slab.R                   | 41 ++++++++++++++++++++----------
 tests/testthat/test-slab.R | 52 ++++++++++++++++++++++++++++++++++----
 2 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index 312a5c8cd..600984d56 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -25,7 +25,7 @@
 	return(pt)
 }
 
-.slab.fun.factor.weighted <- function(values, weights) {
+.slab.fun.factor.weighted <- function(values, w) {
   # TODO
 }
   
@@ -39,15 +39,12 @@
 # for a site or horizon level weight column, use Hmisc::wtd.quantile
 #   note that "frequency weights" are assumed to be most common use case, so normwt=FALSE by default, 
 #   normwt=TRUE can be passed as optional argument for slab.fun from slab() interface
-.slab.fun.numeric.weighted <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), wt, normwt = FALSE) {
+.slab.fun.numeric.weighted <- function(values, w, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), normwt = FALSE) {
   if (!requireNamespace('Hmisc', quietly = TRUE))
     stop('please install the `Hmisc` package to use `wtd.quantile()` method', call. = FALSE)
-  res <- try(Hmisc::wtd.quantile(values, weights = wt, probs = probs, na.rm = TRUE, normwt = normwt), silent = TRUE)
+  res <- try(Hmisc::wtd.quantile(values, w, probs = probs, na.rm = TRUE, normwt = normwt), silent = TRUE)
   if (inherits(res, 'try-error') && grepl("zero non-NA points", res[1])) {
     res <- rep(NA_real_, length(probs))
-  } else if (inherits(res, 'try-error')) {
-    print(res)
-    return(NULL)
   } else {
     names(res) <- paste('p.q', round(probs * 100), sep = '')
   }
@@ -180,11 +177,11 @@
 		# check for weights
 		if (!missing(weights)) {
 		  slab.fun <- .slab.fun.factor.weighted
+		} else {
+  		# re-set default function, currently no user-supply-able option
+  		slab.fun <- .slab.fun.factor.default
 		}
 
-		# re-set default function, currently no user-supply-able option
-		slab.fun <- .slab.fun.factor.default
-
 		# add extra arguments required by this function
 		# note that we cannot accept additional arguments when processing categorical values
 		extra.args <- list(cpm = cpm)
@@ -212,6 +209,14 @@
 		data[, g] <- 1
 	}
  	
+ 	if (length(weights) > 1) {
+ 	  stop("`weights` argument should be a character vector of length one containing (site-level) weight column name", call. = FALSE)
+ 	}
+ 	
+ 	if (!is.null(weights) && !weights %in% siteNames(object)) {
+ 	  stop("column '", weights, "' not present in site data", call. = FALSE)
+ 	}
+ 	
  	# convert wide -> long format
  	d.long <- data.table::melt(
  	  as.data.table(data[which(!is.na(data$seg.label)), ]),
@@ -227,11 +232,13 @@
 	  if (missing(slab.fun)) {
 	    # default _weighted_ aggregation fun is .slab.fun.numeric.weighted
 	    FUN <- .slab.fun.numeric.weighted
-	    # custom weighted aggregation fun should take first argument as values w. named "weights" argument
-	  } else FUN <- slab.fun
-	  
-	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, wt = weights))), by = c('variable', g, 'seg.label')])
-	  d.slabbed$variable
+	    # custom weighted aggregation fun should take first argument as values 
+	    # second argument is "weights" argument
+	  } else {
+	    FUN <- slab.fun
+	  }
+	  wt <- eval(weights)
+	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[wt]]))), by = c('variable', g, 'seg.label')])
 	  d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1
 
 	} else {
@@ -275,6 +282,12 @@
    	
 	}
  	
+ 	# rename default data.table value column names
+ 	unn <- grepl("^V\\d+$", colnames(d.slabbed))
+ 	if (any(unn)) {
+ 	  colnames(d.slabbed)[unn] <- paste0("value", ifelse(sum(unn) == 1, "", 1:sum(unn)))
+ 	}
+ 	
  	# convert the slab.label column into top/bottom as integers
  	slab.depths <- strsplit(as.character(d.slabbed$seg.label), '-')
  	d.slabbed$top <- as.integer(lapply(slab.depths, function(i) i[1]))
diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R
index fc2993e9f..1391f63f6 100644
--- a/tests/testthat/test-slab.R
+++ b/tests/testthat/test-slab.R
@@ -84,6 +84,41 @@ test_that("extended slab functionality: weighted aggregation", {
   expect_true(all(a.1$contributing_fraction == a.2$contributing_fraction))
   expect_true(all(a.2$contributing_fraction == a.3$contributing_fraction))
   
+  # component weighted averages
+  # mukey=461268; Doemill-Jokerst, 3 to 8 percent slopes (615)
+  x <- data.frame(cokey = c(21469960L, 21469960L, 21469960L, 21469960L, 
+                            21469960L, 21469961L, 21469961L, 21469961L), 
+                  comppct_r = c(50L,  50L, 50L, 50L, 50L, 40L, 40L, 40L),
+                  hzdept_r = c(0L, 3L, 13L, 23L, 36L, 0L, 3L, 10L), 
+                  hzdepb_r = c(3L, 13L, 23L, 36L, 61L, 3L, 10L, 35L), 
+                  ph1to1h2o_r = c(6.1, 6.8, 6.7, 6.7, NA, 6.4, 6.5, NA))
+  depths(x) <- cokey ~ hzdept_r + hzdepb_r
+  site(x) <- ~ comppct_r
+  
+  # custom function, with actual comppct_r
+  a.0 <- slab(x, ~ ph1to1h2o_r,
+      slab.structure = c(0, 10),
+      weights = "comppct_r",
+      slab.fun = weighted.mean,
+      na.rm = TRUE
+    )
+  expect_equal(a.0$value, 6.54, tolerance = 0.005) 
+  
+  # should match this within the tolerance:
+  #   soilDB::get_SDA_property(property = 'ph1to1h2o_r', method = "weighted average",
+  #                            mukeys = 461268, miscellaneous_areas = FALSE, include_minors = TRUE,
+  #                            top_depth = 0, bottom_depth = 10)
+                                  
+  # comppct_r adjusted to get higher result more like doemill
+  x$comppct_r <- c(90, 10)
+  a.1 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r")
+  expect_equal(a.1$p.q50, 6.8)
+  
+  # comppct_r adjusted to get lower result more like jokerst
+  x$comppct_r <- c(10, 90)
+  a.2 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r")
+  expect_equal(a.2$p.q50, 6.5)
+  
 })
 
 test_that("slab calculations: mean, single profile", {
@@ -93,7 +128,14 @@ test_that("slab calculations: mean, single profile", {
   
   # aggregate single profile
   # custom slabs
-  a <- slab(sp1[1, ], fm = ~ prop, strict=TRUE, slab.structure=c(0,5,10,25,100), slab.fun = mean, na.rm=TRUE)
+  a <- slab(
+      sp1[1,],
+      fm = ~ prop,
+      strict = TRUE,
+      slab.structure = c(0, 5, 10, 25, 100),
+      slab.fun = mean,
+      na.rm = TRUE
+    )
   
   # using mean and similar functions will cause slab to store single result / slab into 'value' column
   expect_true(any(grepl('value', names(a))))
@@ -101,13 +143,13 @@ test_that("slab calculations: mean, single profile", {
   # calculations done by hand (DEB)
   # weighted mean, single profile
   # 0-5: 9.400
-  expect_equal(a$value[1], 9.4, tolerance=0.0001)
+  expect_equal(a$value[1], 9.4, tolerance = 0.0001)
   # 5-10: 7.000
-  expect_equal(a$value[2], 7, tolerance=0.0001)
+  expect_equal(a$value[2], 7, tolerance = 0.0001)
   # 10-25: 8.466
-  expect_equal(a$value[3], 8.4666, tolerance=0.0001)
+  expect_equal(a$value[3], 8.4666, tolerance = 0.0001)
   # 25-100: 15.625
-  expect_equal(a$value[4], 15.625, tolerance=0.0001)
+  expect_equal(a$value[4], 15.625, tolerance = 0.0001)
   
 })
 

From 20666b1f2f7dfe102c8e20f7fc957a73e57ef155 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Thu, 18 Aug 2022 15:17:25 -0700
Subject: [PATCH 6/7] internal slab functions: Allow for (possibly unused)
 additional arguments to avoid errors  - explicitly support `na.rm` where
 appropriate (defaults to true)

---
 R/slab.R | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index 600984d56..3b7c1e81a 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -1,7 +1,7 @@
 # default slab function for categorical variables
 # returns a named vector of results
 # this type of function is compatible with aggregate()
-.slab.fun.factor.default <- function(values, cpm) {
+.slab.fun.factor.default <- function(values, cpm, ...) {
 
   if (cpm == 1) {
 	  # probabilities are relative to number of contributing profiles
@@ -25,24 +25,24 @@
 	return(pt)
 }
 
-.slab.fun.factor.weighted <- function(values, w) {
+.slab.fun.factor.weighted <- function(values, w, na.rm = TRUE, ...) {
   # TODO
 }
   
 # default slab function for continuous variables
 # returns a named vector of results
 # this type of function is compatible with aggregate()
-.slab.fun.numeric.default <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95)) {
-  .slab.fun.numeric.fast(values, probs = probs)
+.slab.fun.numeric.default <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE, ...) {
+  .slab.fun.numeric.fast(values, probs = probs, na.rm = na.rm, ...)
 }
 
 # for a site or horizon level weight column, use Hmisc::wtd.quantile
 #   note that "frequency weights" are assumed to be most common use case, so normwt=FALSE by default, 
 #   normwt=TRUE can be passed as optional argument for slab.fun from slab() interface
-.slab.fun.numeric.weighted <- function(values, w, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), normwt = FALSE) {
+.slab.fun.numeric.weighted <- function(values, w, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), normwt = FALSE, na.rm = TRUE, ...) {
   if (!requireNamespace('Hmisc', quietly = TRUE))
     stop('please install the `Hmisc` package to use `wtd.quantile()` method', call. = FALSE)
-  res <- try(Hmisc::wtd.quantile(values, w, probs = probs, na.rm = TRUE, normwt = normwt), silent = TRUE)
+  res <- try(Hmisc::wtd.quantile(values, w, probs = probs, na.rm = na.rm, normwt = normwt), silent = TRUE)
   if (inherits(res, 'try-error') && grepl("zero non-NA points", res[1])) {
     res <- rep(NA_real_, length(probs))
   } else {
@@ -52,20 +52,20 @@
 }
 
 # easy specification of Hmisc::hdquantile if available
-.slab.fun.numeric.HD <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95)) {
+.slab.fun.numeric.HD <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE, ...) {
   # sanity check, need this for color distance eval
   if (!requireNamespace('Hmisc', quietly = TRUE))
     stop('please install the `Hmisc` package to use `hdquantile()` method', call. = FALSE)
   
-  res <- Hmisc::hdquantile(values, probs = probs, na.rm = TRUE)
+  res <- Hmisc::hdquantile(values, probs = probs, na.rm = na.rm)
   
   names(res) <- paste('p.q', round(probs * 100), sep = '')
   return(res)
 }
 
 # basic quantile evaluation, better for large data sets
-.slab.fun.numeric.fast <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95)) {
-  res <- quantile(values, probs = probs, na.rm = TRUE)
+.slab.fun.numeric.fast <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE, ...) {
+  res <- quantile(values, probs = probs, na.rm = na.rm)
   names(res) <- paste('p.q', round(probs * 100), sep = '')
   return(res)
 }
@@ -238,7 +238,7 @@
 	    FUN <- slab.fun
 	  }
 	  wt <- eval(weights)
-	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[wt]]))), by = c('variable', g, 'seg.label')])
+	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[wt]], ...))), by = c('variable', g, 'seg.label')])
 	  d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1
 
 	} else {

From ea832fcea59feace2bc901d87e72eb4f7642ac49 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Fri, 19 Aug 2022 09:01:06 -0700
Subject: [PATCH 7/7] Add tests with missing weights

---
 tests/testthat/test-slab.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R
index 1391f63f6..8b1584f4f 100644
--- a/tests/testthat/test-slab.R
+++ b/tests/testthat/test-slab.R
@@ -119,6 +119,14 @@ test_that("extended slab functionality: weighted aggregation", {
   a.2 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r")
   expect_equal(a.2$p.q50, 6.5)
   
+  # comppct_r adjusted to get lower result more like jokerst
+  x$comppct_r <- c(NA, 90)
+  na.1 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r", na.rm = TRUE)
+  expect_equal(na.1$p.q50, 6.5)
+  
+  na.2 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r", slab.fun = weighted.mean, na.rm = TRUE)
+  expect_equal(na.2$value, NA)
+  
 })
 
 test_that("slab calculations: mean, single profile", {