From df7361a9707c18d5a7c63d1d05683d79d0124eb4 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Mon, 22 Aug 2022 11:06:36 -0700
Subject: [PATCH 01/23] Add an explicit tests of overlapping horizons

---
 tests/testthat/test-dice.R | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/testthat/test-dice.R b/tests/testthat/test-dice.R
index bd75185e5..7a733553b 100644
--- a/tests/testthat/test-dice.R
+++ b/tests/testthat/test-dice.R
@@ -271,7 +271,6 @@ test_that("overlapping horizons", {
   expect_equal(nrow(x3), 289)
 })
 
-
 test_that("dropped profile IDs", {
   
   # corrupt depth
@@ -295,5 +294,3 @@ test_that("dropped profile IDs", {
   )
     
 })
-
-

From 65bd2104e1b004d6600392359ebfd23e4b5ca27e Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Tue, 23 Aug 2022 14:54:41 -0700
Subject: [PATCH 02/23] Fix dice() link in segment() docs

---
 R/segment.R    | 11 ++++++-----
 man/segment.Rd |  6 +++---
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/R/segment.R b/R/segment.R
index 7b27e0ce0..24aa76227 100644
--- a/R/segment.R
+++ b/R/segment.R
@@ -3,19 +3,20 @@
 
 #' Segmenting of Soil Horizon Data by Depth Interval
 #' 
-#' This function segments or subdivides horizon data from a `SoilProfileCollection` or `data.frame` by depth interval (e.g. `c(0, 10)`, `c(0, 50)`, or `25:100`). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. `"segment_id"`) are added to each horizon record that correspond with their depth interval (e.g. `025-100`). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples.
+#' @description This function segments or subdivides horizon data from a `SoilProfileCollection` or `data.frame` by depth interval (e.g. `c(0, 10)`, `c(0, 50)`, or `25:100`). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. `"segment_id"`) are added to each horizon record that correspond with their depth interval (e.g. `025-100`). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples.
+#'
 #' @param object either a `SoilProfileCollection` or `data.frame`
 #' @param intervals a vector of integers over which to slice the horizon data (e.g. `c(25, 100)` or `25:100`)
-#' @param trim logical, when `TRUE` horizons in `object` are truncated to the min/max specified in `intervals`. When `FALSE`, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and `trim = FALSE`.
+#' @param trim logical, when `TRUE` horizons in `object` are truncated to the min/max specified in `intervals`. When `FALSE`, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{`trim = FALSE`}.
 #' @param hzdepcols a character vector of length 2 specifying the names of the horizon depths (e.g. `c("hzdept", "hzdepb")`), only necessary if `object` is a `data.frame`.
 #' 
-#' @details `segment()` performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. `slice()` or `slab()`.
+#' @details `segment()` performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice} or \code{slab}.
 #'
-#' @return Either a `SoilProfileCollection` or `data.frame` with the original horizon data segmented by depth intervals. There are usually more records in the resulting object, one for each time a segment interval partially overlaps with a horizon. A new column called `segment_id` identifying the depth interval is added.
+#' @return Either a `SoilProfileCollection` or `data.frame` with the original horizon data segmented by depth intervals. There are usually more records in the resulting object, one for each time a segment interval partially overlaps with a horizon. A new column called \code{segment_id} identifying the depth interval is added.
 #' 
 #' @author Stephen Roecker
 #' 
-#' @seealso [dice()], [glom()]
+#' @seealso [dice()] [glom()]
 #' 
 #' @export
 #'
diff --git a/man/segment.Rd b/man/segment.Rd
index bfad07f73..a3833944b 100644
--- a/man/segment.Rd
+++ b/man/segment.Rd
@@ -11,7 +11,7 @@ segment(object, intervals, trim = TRUE, hzdepcols = NULL)
 
 \item{intervals}{a vector of integers over which to slice the horizon data (e.g. \code{c(25, 100)} or \code{25:100})}
 
-\item{trim}{logical, when \code{TRUE} horizons in \code{object} are truncated to the min/max specified in \code{intervals}. When \code{FALSE}, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{trim = FALSE}.}
+\item{trim}{logical, when \code{TRUE} horizons in \code{object} are truncated to the min/max specified in \code{intervals}. When \code{FALSE}, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{`trim = FALSE`}.}
 
 \item{hzdepcols}{a character vector of length 2 specifying the names of the horizon depths (e.g. \code{c("hzdept", "hzdepb")}), only necessary if \code{object} is a \code{data.frame}.}
 }
@@ -22,7 +22,7 @@ Either a \code{SoilProfileCollection} or \code{data.frame} with the original hor
 This function segments or subdivides horizon data from a \code{SoilProfileCollection} or \code{data.frame} by depth interval (e.g. \code{c(0, 10)}, \code{c(0, 50)}, or \code{25:100}). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. \code{"segment_id"}) are added to each horizon record that correspond with their depth interval (e.g. \code{025-100}). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples.
 }
 \details{
-\code{segment()} performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice()} or \code{slab()}.
+\code{segment()} performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice} or \code{slab}.
 }
 \examples{
 
@@ -111,7 +111,7 @@ head(test3_agg)
 
 }
 \seealso{
-\code{\link[=dice]{dice()}}, \code{\link[=glom]{glom()}}
+\code{\link[=dice]{dice()}} \code{\link[=glom]{glom()}}
 }
 \author{
 Stephen Roecker

From 733d891067ecd6be40130cea35498438778c0f01 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Fri, 19 Aug 2022 09:09:38 -0700
Subject: [PATCH 03/23] Updates to `slab()`  - closes #229

 - NA_real_ for proper expectation of missing weights passed to base
   weighted.mean() (minimal custom slab function)
---
 R/genSlabLabels.R          |  58 +++--
 R/slab.R                   | 448 ++++++++++++++++++-------------------
 man/slab-methods.Rd        |  18 +-
 tests/testthat/test-slab.R | 113 +++++++++-
 4 files changed, 364 insertions(+), 273 deletions(-)

diff --git a/R/genSlabLabels.R b/R/genSlabLabels.R
index 5c9be4caf..f5c234ee8 100644
--- a/R/genSlabLabels.R
+++ b/R/genSlabLabels.R
@@ -1,54 +1,52 @@
 ## TODO: documentation / generalization
-# note source data must be normalized via dice() first, e.g. all share the same number of horizons
+# note source data must be "normalized" via dice() first; assumes each profile has the same number of horizons
 
 # generate labels for slabs
-genSlabLabels <- function(slab.structure=1, max.d, n.profiles) {
+genSlabLabels <- function(slab.structure = 1, max.d, n.profiles) {
   
   # fixed-size slabs
-  if(length(slab.structure) == 1) {
+  if (length(slab.structure) == 1) {
     # generate sequence of segment labels
-    seg.label <- rep(1:ceiling(max.d / slab.structure), each=slab.structure, length=max.d)
+    seg.label <- rep(1:ceiling(max.d / slab.structure), each = slab.structure, length = max.d)
+    
     # general segment labels
-    seg.label.levels <- tapply(1:max.d, seg.label, function(i) {r <- range(i); paste(c(r[1]-1, r[2]), collapse='-') } )
+    seg.label.levels <- tapply(1:max.d, seg.label, 
+                               function(i) {
+                                 r <- range(i, na.rm = TRUE)
+                                 paste(c(r[1] - 1, r[2]), collapse = '-')
+                               })
   }
   
   # user-defined slabs
-  if(length(slab.structure) > 1) {
-    # trival case where segments start from 0
-    if(slab.structure[1] == 0 & length(slab.structure) > 2)
-      seg.label <- rep(slab.structure[-1], times=diff(slab.structure))[1:max.d]
+  if (length(slab.structure) > 1) {
+    if (slab.structure[1] == 0 & length(slab.structure) > 2) {
+      # trivial case where segments start from 0
+      
+      seg.label <- rep(slab.structure[-1], times = diff(slab.structure))[1:max.d]
     
-    # other case: user defines an arbitrary lower and upper limit
-    else {
-      if(length(slab.structure) != 2)
+    } else {
+      # other case: user defines an arbitrary lower and upper limit
+      if (length(slab.structure) != 2)
         stop('user-defined slab boundaries must either start from 0, or contain two values between 0 and the max soil depth')
       
-      # proceed
+      # calculate thickness of slab
       slab.thickness <- diff(slab.structure)
-      # how many slices of NA before the slab?
-      padding.before <- rep(NA, times=slab.structure[1])
-      # how many slices of NA afer the slab
-      padding.after <- rep(NA, times=abs(max.d - slab.structure[2]))
+      
       # make a new label for the slab
-      new.label <- paste(slab.structure, collapse='-')
-      # generate an index for the slab
-      slab.idx <- rep(new.label, times=slab.thickness)
-      # generate the entire index: padding+slab+padding = total number of slices (max_d)
-      # seg.label <- c(padding.before, slab.idx, padding.after)
-      seg.label <- slab.idx 
+      new.label <- paste(slab.structure, collapse = '-')
+      
+      slab.idx <- rep(new.label, times = slab.thickness)
+      seg.label <- slab.idx
     }
     
     # generate segment labels	
-    seg.label.levels <- sapply(1:(length(slab.structure)-1), function(i) paste(c(slab.structure[i], slab.structure[i+1]), collapse='-'))
+    seg.label.levels <- sapply(1:(length(slab.structure) - 1), function(i) {
+      paste(c(slab.structure[i], slab.structure[i + 1]), collapse = '-')
+    })
   }
   
   # covert into a factor that can be used to split profiles into slabs
-  res <- factor(rep(seg.label, times=n.profiles), labels=seg.label.levels)
+  res <- factor(rep(seg.label, times = n.profiles), labels = seg.label.levels)
   
   return(res)
 }
-
-
-
-
-
diff --git a/R/slab.R b/R/slab.R
index 5fb326d6f..3b7c1e81a 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -1,20 +1,16 @@
-
-
 # default slab function for categorical variables
 # returns a named vector of results
 # this type of function is compatible with aggregate()
-.slab.fun.factor.default <- function(values, cpm) {
+.slab.fun.factor.default <- function(values, cpm, ...) {
 
-	# probabilities are relative to number of contributing profiles
-	if(cpm == 1) {
-		tb <- table(values, useNA='no')
+  if (cpm == 1) {
+	  # probabilities are relative to number of contributing profiles
+    tb <- table(values, useNA = 'no')
 		# convert to proportions
 		pt <- prop.table(tb)
-	}
-
-	# probabilities are relative to total number of profiles
-	else if(cpm == 2) {
-		tb <- table(values, useNA='always')
+	} else if (cpm == 2) {
+	  # probabilities are relative to total number of profiles
+	  tb <- table(values, useNA = 'always')
 		# convert to proportions,
 		# the last column will be named 'NA', and contains the tally of NAs --> remove it
 		pt <- prop.table(tb)
@@ -27,62 +23,53 @@
 	names(pt) <- make.names(levels(values))
 
 	return(pt)
-	}
-
-
-## TODO: make these exported functions with documentation (https://github.com/ncss-tech/aqp/issues/99)
-
+}
 
+.slab.fun.factor.weighted <- function(values, w, na.rm = TRUE, ...) {
+  # TODO
+}
+  
 # default slab function for continuous variables
 # returns a named vector of results
 # this type of function is compatible with aggregate()
-.slab.fun.numeric.default <- function(values) {
-	q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
-	## 2019-10-30: dropping Hmisc as a suggested package
-	# res <- Hmisc::hdquantile(values, probs=q.probs, na.rm=TRUE)
-	res <- quantile(values, probs=q.probs, na.rm=TRUE)
-	names(res) <- paste('p.q', round(q.probs * 100), sep='')
-	return(res)
+.slab.fun.numeric.default <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE, ...) {
+  .slab.fun.numeric.fast(values, probs = probs, na.rm = na.rm, ...)
+}
+
+# for a site or horizon level weight column, use Hmisc::wtd.quantile
+#   note that "frequency weights" are assumed to be most common use case, so normwt=FALSE by default, 
+#   normwt=TRUE can be passed as optional argument for slab.fun from slab() interface
+.slab.fun.numeric.weighted <- function(values, w, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), normwt = FALSE, na.rm = TRUE, ...) {
+  if (!requireNamespace('Hmisc', quietly = TRUE))
+    stop('please install the `Hmisc` package to use `wtd.quantile()` method', call. = FALSE)
+  res <- try(Hmisc::wtd.quantile(values, w, probs = probs, na.rm = na.rm, normwt = normwt), silent = TRUE)
+  if (inherits(res, 'try-error') && grepl("zero non-NA points", res[1])) {
+    res <- rep(NA_real_, length(probs))
+  } else {
+    names(res) <- paste('p.q', round(probs * 100), sep = '')
+  }
+  return(res)
 }
 
 # easy specification of Hmisc::hdquantile if available
-.slab.fun.numeric.HD <- function(values) {
+.slab.fun.numeric.HD <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE, ...) {
   # sanity check, need this for color distance eval
-  if(!requireNamespace('Hmisc', quietly = TRUE))
-    stop('pleast install the `Hmisc` package.', call.=FALSE)
-
-  q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
-
-  res <- Hmisc::hdquantile(values, probs=q.probs, na.rm=TRUE)
-
-  names(res) <- paste('p.q', round(q.probs * 100), sep='')
+  if (!requireNamespace('Hmisc', quietly = TRUE))
+    stop('please install the `Hmisc` package to use `hdquantile()` method', call. = FALSE)
+  
+  res <- Hmisc::hdquantile(values, probs = probs, na.rm = na.rm)
+  
+  names(res) <- paste('p.q', round(probs * 100), sep = '')
   return(res)
 }
 
-# basic quantile evaluation, better for large datasets
-.slab.fun.numeric.fast <- function(values) {
-	q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
-	res <- quantile(values, probs=q.probs, na.rm=TRUE)
-	names(res) <- paste('p.q', round(q.probs * 100), sep='')
-	return(res)
+# basic quantile evaluation, better for large data sets
+.slab.fun.numeric.fast <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE, ...) {
+  res <- quantile(values, probs = probs, na.rm = na.rm)
+  names(res) <- paste('p.q', round(probs * 100), sep = '')
+  return(res)
 }
 
-
-# ## TODO: not yet implemented
-# # default slab function for weighted, continuous variables
-# # returns a named vector of results
-# # this type of function is compatible with aggregate()
-# # NOTE: nw (normalize-weights) argument will affect the results!
-# .slab.fun.wtd.numeric.default <- function(values, w, nw=TRUE) {
-# 	q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
-# 	res <- wtd.quantile(values, weights=w, probs=q.probs, na.rm=TRUE, normwt=nw)
-# 	names(res) <- paste('p.q', round(q.probs * 100), sep='')
-# 	return(res)
-# }
-
-
-## note: slab() uses slice() to resample to 1cm slices, to max(x) or slab.structure[2] if defined
-
 # SoilProfileCollection method
 # object: SoilProfileCollection
 # fm: formula defining aggregation
@@ -90,20 +77,23 @@
 # progress: plyr-progress display
 # slab.fun: aggregate function applied to data chunks (must return a single row / chunk)
 # cpm: class probability normalization mode
-# weights: character vector naming column containing weights
+# weights: character vector naming column in site slot containing weights
 # ... : extra arguments passed on to slab.fun
-
-# this is about 40% slower than the old method
-.slab <- function(object, fm, slab.structure=1, strict=FALSE, slab.fun=.slab.fun.numeric.default, cpm=1, weights=NULL, ...){
-	# issue a message for now that things have changed
-	#	message('usage of slab() has changed considerably, please see the manual page for details')
-
+.slab <- function(object,
+                  fm,
+                  slab.structure = 1,
+                  strict = FALSE,
+                  slab.fun = slab_function(method = "numeric"),
+                  cpm = 1,
+                  weights = NULL,
+                  ...) {
+
+  # define keywords for data.table
+  .SD <- NULL; .N <- NULL; value <- NULL
+  
 	# get extra arguments: length of 0 if no extra arguments
 	extra.args <- list(...)
 
-	## important: change the default behavior of data.frame and melt
-	opt.original <- options(stringsAsFactors = FALSE)
-
 	# get unique list of names in object
 	object.names <- unique(unlist(names(object)))
 
@@ -120,109 +110,78 @@
 	original.levels <- NULL
 
 	# extract components of the formula:
-	g <- all.vars(update(fm, .~0)) # left-hand side
-	vars <- all.vars(update(fm, 0~.)) # right-hand side
-
+	g <- all.vars(update(fm, . ~ 0)) # left-hand side
+	vars <- all.vars(update(fm, 0 ~ .)) # right-hand side
+	
 	# sanity check:
-	if(! inherits(fm, "formula"))
-		stop('must provide a valid formula: groups ~ var1 + var2 + ...', call.=FALSE)
-
+	if (!inherits(fm, "formula"))
+	  stop('must provide a valid formula: groups ~ var1 + var2 + ...', call. = FALSE)
+	
 	# check for bogus left/right side problems with the formula
-	if(any(g %in% object.names) == FALSE & g != '.') # bogus grouping column
-		stop('group name either missing from formula, or does match any columns in dataframe', call.=FALSE)
-
-	if(any(vars %in% object.names) == FALSE) # bogus column names in right-hand side
-		stop('column names in formula do not match column names in dataframe', call.=FALSE)
+	if (any(g %in% object.names) == FALSE & g != '.') # bogus grouping column
+	  stop('group name either missing from formula, or does match any columns in dataframe', call. = FALSE)
 
-	
-	## old slice() version ###############################
+	if (any(vars %in% object.names) == FALSE) # bogus column names in right-hand side
+	  stop('column names in formula do not match column names in dataframe', call. = FALSE)
 	
 	# make formula for slicing
-	## TODO: slice() returns an extra row, so only request slices to max-1
-	fm.slice <- formula(paste('0:', max(object)-1, ' ~ ', paste(vars, collapse=' + '), sep=''))
-
-	# short-cut for user-defined slab
-	## TODO: slice() returns an extra row, so only request slices to max-1
-	if(length(slab.structure) == 2 )
-		fm.slice <- formula(paste(slab.structure[1], ':', slab.structure[2]-1, ' ~ ', paste(vars, collapse=' + '), sep=''))
-
-	# slice into 1cm increments, result is a data.frame
-	data <- slice(object, fm.slice, strict=strict, just.the.data=TRUE)
-
-	#######################################################
-	
-	
-	## dice() version  ####################################
-	# https://github.com/ncss-tech/aqp/issues/115
-	
-	## TODO:
-	#  * enforce filling / check assumptions about length
-	#  * consider total re-write vs. adaptation
-	
-	# # make formula for slicing
-	# fm.slice <- formula(paste('0:', max(object), ' ~ ', paste(vars, collapse=' + '), sep=''))
-	# 
-	# # short-cut for user-defined slab
-	# if(length(slab.structure) == 2 )
-	#   fm.slice <- formula(paste(slab.structure[1], ':', slab.structure[2], ' ~ ', paste(vars, collapse=' + '), sep=''))
-	# 
-	# # slice into 1cm increments, result is a data.frame
-	# data <- dice(x = object, fm = fm.slice, strict = strict, SPC = FALSE, pctMissing = TRUE)
-	
-	#######################################################
-	
-	
+	if (length(slab.structure) == 2) {
+  	# user-defined single slab
+	  fm.slice <- formula(paste(slab.structure[1], ':', slab.structure[2], ' ~ ', paste(vars, collapse = ' + '), sep = ''))
+	} else {
+	  fm.slice <- formula(paste('0:', max(object), ' ~ ', paste(vars, collapse = ' + '), sep = ''))
+	}
 	
+	# slice into 1cm increments, result is a data.frame
+	data <- dice(x = object, fm = fm.slice, strict = strict, SPC = FALSE, pctMissing = TRUE)
+  
+	# Note: in this case we need to subtract the extra slice included by slice()/dice()
+  # do it to sliced result so that the genSlabLabels have the correct length
+  ldx <- data[[horizonDepths(object)[2]]] > slab.structure[2]
+	if (length(slab.structure) == 2 && any(ldx, na.rm = TRUE)) {
+	  data <- data[which(!ldx), ]  
+	}
 	
 	# extract site data
 	site.data <- site(object)
 
 	# check groups for NA, if grouping variable was given
-	if(g != '.')
-		if(any(is.na(site.data[, g])))
-			stop('grouping variable must not contain NA', call.=FALSE)
-
-	### !!! this runs out of memory when groups contains NA !!!
-	## this is generating a lot of extra objects and possibly wasting memory
+	if (g != '.' && any(is.na(site.data[, g]))) {
+		stop('grouping variable must not contain NA', call. = FALSE)
+	}
+	
 	# merge site data back into the result, this would include site-level weights
-	data <- merge(x = data, y = site.data, by=object.ID, all.x=TRUE, sort=FALSE)
-
-	# clean-up
-	rm(object, site.data)
-	gc()
+	data <- merge(x = data, y = site.data, by = object.ID, all.x = TRUE, sort = FALSE)
 
 	# check variable classes
-	if(length(vars) > 1) {
+	if (length(vars) > 1) {
 	  vars.numeric.test <- sapply(data[, vars], is.numeric)
 	}	else {
 	  vars.numeric.test <- is.numeric(data[[vars]])
 	}
 
-
 	# sanity check: all numeric, or single character/factor
-	if(any(! vars.numeric.test) & length(vars) > 1) {
+	if (any(!vars.numeric.test) & length(vars) > 1) {
 	  stop('mixed variable types and multiple categorical variables are not currently supported in the same call to slab', call. = FALSE)
 	}
 
-
 	# check for single categorical variable, and convert to factor
-	if(length(vars) == 1 & inherits(data[, vars], c('character', 'factor'))) {
+	if (length(vars) == 1 & inherits(data[, vars], c('character', 'factor'))) {
 
 		# if we have a character, then convert to factor
-		if(inherits(data[[vars]],'character')) {
+		if (inherits(data[[vars]],'character')) {
 			message('Note: converting categorical variable to factor.')
 			data[[vars]] <- factor(data[[vars]])
 		}
 
 		# check for weights
-		if(!missing(weights)) {
-		  stop('weighted aggregation of categorical variables not yet implemented', call.=FALSE)
+		if (!missing(weights)) {
+		  slab.fun <- .slab.fun.factor.weighted
+		} else {
+  		# re-set default function, currently no user-supply-able option
+  		slab.fun <- .slab.fun.factor.default
 		}
 
-
-		# re-set default function, currently no user-supply-able option
-		slab.fun <- .slab.fun.factor.default
-
 		# add extra arguments required by this function
 		# note that we cannot accept additional arguments when processing categorical values
 		extra.args <- list(cpm = cpm)
@@ -236,127 +195,128 @@
 	  .factorFlag <- FALSE
 	}
 
-
-	####
-	#### optimization note: use of factor labels could be slowing things down...
-	####
+	## Note: use of factor labels could be slowing things down...
 	## Note: this assumes ordering is correct in source data / segment labels
-	## TODO: make sure that nrow(data) == genSlabLabels(slab.structure = slab.structure, max.d = max.d, n.profiles = n.profiles)
 	## TODO: investigate use of split() to speed things up, no need to keep everything in the safe DF:
-	##
-	##         l <- split(data, data$seg.label, drop=FALSE)
-	##
-	## ... parallel processing with furrr
+	##       l <- split(data, data$seg.label, drop=FALSE)
 
 	# add segmenting label to data
  	data$seg.label <- genSlabLabels(slab.structure = slab.structure, max.d = max.d, n.profiles = n.profiles)
 
 	# if there is no left-hand component in the formula, we are aggregating all data in the collection
-	if(g == '.') {
+	if (g == '.') {
 		g <- 'all.profiles' # add new grouping variable to horizons
 		data[, g] <- 1
 	}
-
-
-	##
-	## TODO: adding weighted-aggregate functionality here
-	## we can't use aggregate() for this
-	## we can use data.table methods
-
+ 	
+ 	if (length(weights) > 1) {
+ 	  stop("`weights` argument should be a character vector of length one containing (site-level) weight column name", call. = FALSE)
+ 	}
+ 	
+ 	if (!is.null(weights) && !weights %in% siteNames(object)) {
+ 	  stop("column '", weights, "' not present in site data", call. = FALSE)
+ 	}
+ 	
+ 	# convert wide -> long format
+ 	d.long <- data.table::melt(
+ 	  as.data.table(data[which(!is.na(data$seg.label)), ]),
+ 	  id.vars = unique(c(object.ID, 'seg.label', g, weights)),
+ 	  measure.vars = vars
+ 	)
+ 	
+ 	# make a formula for aggregate()
+ 	aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep = ''))
+ 	
 	# check for weights
-	if(!missing(weights))
-		stop('weighted aggregation is not yet supported', call.=FALSE)
-
- 	## TODO: why would this ever happen?
-	# throwing out those rows with an NA segment label
-	seg.label.is.not.NA <- which(!is.na(data$seg.label))
-
-	# convert into long format
-	# d.long.df <- reshape::melt(data[seg.label.is.not.NA, ], id.vars=c(object.ID, 'seg.label', g), measure.vars=vars)
-
-	# convert wide -> long format
-	# using data.table::melt()
-	# note that this will not preserve factor levels when 'vars' is categorical
-	# must call unique() on `id.vars`
-	d.long <- melt(
-	  as.data.table(data[seg.label.is.not.NA, ]),
-	  id.vars = unique(c(object.ID, 'seg.label', g)),
-	  measure.vars = vars,
-	  )
-
-	# convert back to data.frame
-	d.long <- as.data.frame(d.long)
-
-  # reset factor levels in d.long[[value]]
-	if(.factorFlag) {
-	  d.long[['value']] <- factor(d.long[['value']], levels = original.levels)
-	}
-
-	# make a formula for aggregate()
-	aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep=''))
-
-	##
-	## TODO: this might be the place to implement parallel code in furrr
-	##       1. split into a list based on number of cores/cpus available
-	##       2. aggregate using seg.label + variable in parallel
-	##       3. combine results (a list of data.frames)
-	##
-	## NOPE: use data.table which will automatically scale to multiple threads
-	##
-
-
+	if (!missing(weights)) {
+	  if (missing(slab.fun)) {
+	    # default _weighted_ aggregation fun is .slab.fun.numeric.weighted
+	    FUN <- .slab.fun.numeric.weighted
+	    # custom weighted aggregation fun should take first argument as values 
+	    # second argument is "weights" argument
+	  } else {
+	    FUN <- slab.fun
+	  }
+	  wt <- eval(weights)
+	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[wt]], ...))), by = c('variable', g, 'seg.label')])
+	  d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1
 
-	# process chunks according to group -> variable -> segment
-	# NA values are not explicitly dropped
-	if(length(extra.args) == 0)
-		d.slabbed <- aggregate(aggregate.fm, data=d.long, na.action=na.pass, FUN=slab.fun)
-
-	# optionally account for extra arguments
-	else {
-		the.args <- c(list(aggregate.fm, data=d.long, na.action=na.pass, FUN=slab.fun), extra.args)
-		d.slabbed <- do.call(what='aggregate', args=the.args)
+	} else {
+	  # convert back to data.frame
+	  d.long <- as.data.frame(d.long)
+	  
+	  # reset factor levels in d.long[[value]]
+	  if (.factorFlag) {
+	    d.long[['value']] <- factor(d.long[['value']], levels = original.levels)
+	  }
+	  
+	  # process chunks according to group -> variable -> segment
+	  # NA values are not explicitly dropped
+	  if (length(extra.args) == 0) {
+	    d.slabbed <- aggregate(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun)
+	  } else {
+	    # optionally account for extra arguments
+	    the.args <- c(list(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun), extra.args)
+	    d.slabbed <- do.call(what = 'aggregate', args = the.args)
+	  }
+	  
+	  # if slab.fun returns a vector of length > 1 we must:
+	  # convert the complex data.frame returned by aggregate into a regular data.frame
+	  # the column 'value' is a matrix with the results of slab.fun
+	  if (inherits(d.slabbed$value, 'matrix')) {
+	    d.slabbed <- cbind(d.slabbed[, 1:3], d.slabbed$value)
+	  }
+	  
+	  # ensure that the names returned from slab.fun are legal
+	  names(d.slabbed) <- make.names(names(d.slabbed)) 
+	  
+   	# estimate contributing fraction
+   	d.slabbed$contributing_fraction <- aggregate(
+   	  aggregate.fm,
+   	  data = d.long,
+   	  na.action = na.pass,
+   	  FUN = function(i) {
+   	    length(na.omit(i)) / length(i)
+   	  }
+   	)$value
+   	
 	}
-
-
-	# if slab.fun returns a vector of length > 1 we must:
-	# convert the complex data.frame returned by aggregate into a regular data.frame
-	# the column 'value' is a matrix with the results of slab.fun
-	if(inherits(d.slabbed$value, 'matrix'))
-		d.slabbed <- cbind(d.slabbed[, 1:3], d.slabbed$value)
-
-  # ensure that the names returned from slab.fun are legal
-  names(d.slabbed) <- make.names(names(d.slabbed))
-
-	# convert the slab.label column into top/bottom as integers
-	slab.depths <- strsplit(as.character(d.slabbed$seg.label), '-')
-	d.slabbed$top <- as.integer(lapply(slab.depths, function(i) i[1]))
-	d.slabbed$bottom <- as.integer(lapply(slab.depths, function(i) i[2]))
-
-	# estimate contributing fraction
-	d.slabbed$contributing_fraction <- aggregate(aggregate.fm, data=d.long, na.action=na.pass, FUN=function(i) {length(na.omit(i)) / length(i)})$value
-
-
+ 	
+ 	# rename default data.table value column names
+ 	unn <- grepl("^V\\d+$", colnames(d.slabbed))
+ 	if (any(unn)) {
+ 	  colnames(d.slabbed)[unn] <- paste0("value", ifelse(sum(unn) == 1, "", 1:sum(unn)))
+ 	}
+ 	
+ 	# convert the slab.label column into top/bottom as integers
+ 	slab.depths <- strsplit(as.character(d.slabbed$seg.label), '-')
+ 	d.slabbed$top <- as.integer(lapply(slab.depths, function(i) i[1]))
+ 	d.slabbed$bottom <- as.integer(lapply(slab.depths, function(i) i[2]))
+ 	
 	# remove seg.label from result
 	d.slabbed$seg.label <- NULL
 
   # if categorical data have been aggregated, set an attribute with the original levels
   # this allows one to recover values that are not legal column names
   # e.g. 2Bt is corrupted to X2Bt
-  if(! is.null(original.levels))
+  if (!is.null(original.levels)) {
     attr(d.slabbed, 'original.levels') <- original.levels
-
-	# reset options:
-	options(opt.original)
-
-	# done
+  }
+	
 	return(d.slabbed)
 }
 
 # setup generic function
-# if (!isGeneric("slab"))
-	setGeneric("slab", function(object, fm, slab.structure=1, strict=FALSE, slab.fun=.slab.fun.numeric.default, cpm=1, weights=NULL, ...) standardGeneric("slab"))
+setGeneric("slab", function(object,
+                              fm,
+                              slab.structure = 1,
+                              strict = FALSE,
+                              slab.fun = slab_function(method = "numeric"),
+                              cpm = 1,
+                              weights = NULL,
+                              ...)
+    standardGeneric("slab"))
 
-# method dispatch
 #' Slab-Wise Aggregation of SoilProfileCollection Objects
 #'
 #' Aggregate soil properties along user-defined `slabs`, and optionally within
@@ -373,6 +333,8 @@
 #' \code{stats::quantile} from the Hmisc package, which requires at least 2
 #' observations per chunk. Note that if `group` is a factor it must not contain
 #' NAs.
+#' 
+#' `slab()` uses `dice()` to "resample" profiles to 1cm slices from depth 0 to `max(x)` (or `slab.structure[2]`, if defined).
 #'
 #' Sometimes \code{slab} is used to conveniently re-arrange data vs. aggregate.
 #' This is performed by specifying \code{identity} in \code{slab.fun}. See
@@ -416,7 +378,7 @@
 #' either: number of profiles with data at the current slice (cpm=1), or by the
 #' number of profiles in the collection (cpm=2). Mode 1 values will always sum
 #' to the contributing fraction, while mode 2 values will always sum to 1.
-#' @param weights Column name containing weights. NOT YET IMPLEMENTED
+#' @param weights Column name containing site-level weights
 #' @param \dots further arguments passed to \code{slab.fun}
 #' @return Output is returned in long format, such that slice-wise aggregates
 #' are returned once for each combination of grouping level (optional),
@@ -526,7 +488,6 @@
 #'        scales=list(x=list(alternating=1))
 #' )
 #'
-#'
 #' ##
 #' ## categorical variable example
 #' ##
@@ -780,6 +741,7 @@
 #'
 #'
 #'
+# # TODO: update this when weights are implemented
 #' ## weighted-aggregation -- NOT YET IMPLEMENTED --
 #' # load sample data, upgrade to SoilProfileCollection
 #' data(sp1)
@@ -790,4 +752,24 @@
 #' sp1$site.wts <- runif(n=length(sp1), min=20, max=100)
 #' }
 #'
-setMethod(f='slab', signature='SoilProfileCollection', definition=.slab)
+setMethod(f = 'slab',
+          signature = 'SoilProfileCollection',
+          definition = .slab)
+
+#' @param method one of `"numeric"`, `"factor"`, `"hd"`, `"weighted.numeric"`, `"weighted.factor"`, `"fast"`
+#' @details `slab_function()`: The default `"numeric"` aggregation method is the `"fast"` numeric (quantile) method. Additional methods include `"factor"` for categorical data, `"hd"` to use the Harrell-Davis Distribution-Free Quantile Estimator from the Hmisc package, and "`weighted`" to use a weighted quantile method from the Hmisc package
+#' @return `slab_function()`: return an aggregation function based on the `method` argument
+#' @rdname slab-methods
+#' @export
+slab_function <- function(method = c("numeric", "factor", "hd", "weighted.numeric", "weighted.factor", "fast")) {
+  switch(method,
+         numeric = .slab.fun.numeric.default, # uses "fast" numeric method
+         factor = .slab.fun.factor.default, 
+         weighted.factor = .slab.fun.factor.weighted, 
+         hd = .slab.fun.numeric.HD, 
+         weighted.numeric = .slab.fun.numeric.weighted,
+         weighted.factor = .slab.fun.numeric.weighted,
+         fast = .slab.fun.numeric.fast,
+         .slab.fun.numeric.default
+  )
+}
diff --git a/man/slab-methods.Rd b/man/slab-methods.Rd
index 305a43998..618c6cde1 100644
--- a/man/slab-methods.Rd
+++ b/man/slab-methods.Rd
@@ -7,6 +7,7 @@
 \alias{slab2}
 \alias{genSlabLabels}
 \alias{slab,SoilProfileCollection-method}
+\alias{slab_function}
 \title{Slab-Wise Aggregation of SoilProfileCollection Objects}
 \usage{
 \S4method{slab}{SoilProfileCollection}(
@@ -14,11 +15,15 @@
   fm,
   slab.structure = 1,
   strict = FALSE,
-  slab.fun = .slab.fun.numeric.default,
+  slab.fun = slab_function(method = "numeric"),
   cpm = 1,
   weights = NULL,
   ...
 )
+
+slab_function(
+  method = c("numeric", "factor", "hd", "weighted.numeric", "weighted.factor", "fast")
+)
 }
 \arguments{
 \item{object}{a SoilProfileCollection}
@@ -42,9 +47,11 @@ either: number of profiles with data at the current slice (cpm=1), or by the
 number of profiles in the collection (cpm=2). Mode 1 values will always sum
 to the contributing fraction, while mode 2 values will always sum to 1.}
 
-\item{weights}{Column name containing weights. NOT YET IMPLEMENTED}
+\item{weights}{Column name containing site-level weights}
 
 \item{\dots}{further arguments passed to \code{slab.fun}}
+
+\item{method}{one of \code{"numeric"}, \code{"factor"}, \code{"hd"}, \code{"weighted.numeric"}, \code{"weighted.factor"}, \code{"fast"}}
 }
 \value{
 Output is returned in long format, such that slice-wise aggregates
@@ -74,6 +81,8 @@ fraction of profiles contributing to the aggregate value, ranges from
 1/n_profiles to 1.}
 
 }
+
+\code{slab_function()}: return an aggregation function based on the \code{method} argument
 }
 \description{
 Aggregate soil properties along user-defined \code{slabs}, and optionally within
@@ -92,6 +101,8 @@ mean, mean-1SD and mean+1SD. The default slab function wraps
 observations per chunk. Note that if \code{group} is a factor it must not contain
 NAs.
 
+\code{slab()} uses \code{dice()} to "resample" profiles to 1cm slices from depth 0 to \code{max(x)} (or \code{slab.structure[2]}, if defined).
+
 Sometimes \code{slab} is used to conveniently re-arrange data vs. aggregate.
 This is performed by specifying \code{identity} in \code{slab.fun}. See
 examples beflow for a demonstration of this functionality.
@@ -114,6 +125,8 @@ integers}{e.g. c(50, 60): data are aggregated over depths spanning 50--60
 units} \item{a vector of 3 or more integers}{e.g. c(0, 5, 10, 50, 100): data
 are aggregated over the depths spanning 0--5, 5--10, 10--50, 50--100 units}
 }
+
+\code{slab_function()}: The default \code{"numeric"} aggregation method is the \code{"fast"} numeric (quantile) method. Additional methods include \code{"factor"} for categorical data, \code{"hd"} to use the Harrell-Davis Distribution-Free Quantile Estimator from the Hmisc package, and "\code{weighted}" to use a weighted quantile method from the Hmisc package
 }
 \note{
 Arguments to \code{slab} have changed with \code{aqp} 1.5 (2012-12-29)
@@ -191,7 +204,6 @@ xyplot(top ~ value | id, data=a, ylab='Depth',
        scales=list(x=list(alternating=1))
 )
 
-
 ##
 ## categorical variable example
 ##
diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R
index c45537f79..10a4c5739 100644
--- a/tests/testthat/test-slab.R
+++ b/tests/testthat/test-slab.R
@@ -1,7 +1,5 @@
 context("slab method for SoilProfileCollection objects")
 
-
-
 test_that("basic slab functionality", {
   
   data(sp1, package = 'aqp')
@@ -35,8 +33,102 @@ test_that("basic slab functionality", {
   expect_true(any(grepl('p.q', nm)))
 })
 
+test_that("slab structure for a single slab", {
+  data(sp4, package = 'aqp')
+  depths(sp4) <- id ~ top + bottom
+  
+  sp4$group <- c(rep('A',5), rep('B',5))
+  a.1 <- slab(
+    sp4,
+    fm = ~ sand + silt + clay,
+    slab.structure = c(0, 10),
+    slab.fun = mean,
+    na.rm = TRUE
+  )
+  expect_equal(nrow(a.1), 3)
+  
+  # again, this time within groups defined by a site-level attribute:
+  a.1 <- slab(
+    sp4,
+    fm = group ~ sand + silt + clay,
+    slab.structure = c(0, 10),
+    slab.fun = mean,
+    na.rm = TRUE
+  )
+  expect_equal(nrow(a.1), 6)
+})
 
 
+test_that("extended slab functionality: weighted aggregation", {
+  
+  data(sp1, package = 'aqp')
+  depths(sp1) <- id ~ top + bottom
+  
+  sp1$weights <- rep(1:2, length(sp1))[1:length(sp1)]
+  sp1$wtgrp <- rep(1, length(sp1))
+  
+  # we expect quantile estimates to vary (given weighted v.s. unweighted)
+  a.0 <- slab(sp1, fm = ~ prop, weights = "weights")
+  a.1 <- slab(sp1, fm = ~ prop, strict = TRUE, weights = "weights")
+  a.2 <- slab(sp1, fm = wtgrp ~ prop, strict = TRUE, weights = "weights")
+  a.3 <- slab(sp1, fm = wtgrp ~ prop, strict = TRUE)
+  
+  # expect consistent structure for weighted/unweighted: same column names except for group
+  ungroupcols <- colnames(a.3)
+  ungroupcols[2] <- "all.profiles"
+  expect_equal(colnames(a.0), ungroupcols)
+  expect_equal(colnames(a.1), ungroupcols)
+  expect_equal(colnames(a.2), colnames(a.3))
+  
+  # contributing fractions should be identical
+  expect_true(all(a.1$contributing_fraction == a.2$contributing_fraction))
+  expect_true(all(a.2$contributing_fraction == a.3$contributing_fraction))
+  
+  # component weighted averages
+  # mukey=461268; Doemill-Jokerst, 3 to 8 percent slopes (615)
+  x <- data.frame(cokey = c(21469960L, 21469960L, 21469960L, 21469960L, 
+                            21469960L, 21469961L, 21469961L, 21469961L), 
+                  comppct_r = c(50L,  50L, 50L, 50L, 50L, 40L, 40L, 40L),
+                  hzdept_r = c(0L, 3L, 13L, 23L, 36L, 0L, 3L, 10L), 
+                  hzdepb_r = c(3L, 13L, 23L, 36L, 61L, 3L, 10L, 35L), 
+                  ph1to1h2o_r = c(6.1, 6.8, 6.7, 6.7, NA, 6.4, 6.5, NA))
+  depths(x) <- cokey ~ hzdept_r + hzdepb_r
+  site(x) <- ~ comppct_r
+  
+  # custom function, with actual comppct_r
+  a.0 <- slab(x, ~ ph1to1h2o_r,
+      slab.structure = c(0, 10),
+      weights = "comppct_r",
+      slab.fun = weighted.mean,
+      na.rm = TRUE
+    )
+  expect_equal(a.0$value, 6.54, tolerance = 0.005) 
+  
+  # should match this within the tolerance:
+  #   soilDB::get_SDA_property(property = 'ph1to1h2o_r', method = "weighted average",
+  #                            mukeys = 461268, miscellaneous_areas = FALSE, include_minors = TRUE,
+  #                            top_depth = 0, bottom_depth = 10)
+                                  
+  # comppct_r adjusted to get higher result more like doemill
+  x$comppct_r <- c(90, 10)
+  a.1 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r")
+  expect_equal(a.1$p.q50, 6.8)
+  
+  # comppct_r adjusted to get lower result more like jokerst
+  x$comppct_r <- c(10, 90)
+  a.2 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r")
+  expect_equal(a.2$p.q50, 6.5)
+  
+  # comppct_r adjusted to get lower result more like jokerst
+  x$comppct_r <- c(NA, 90)
+  na.1 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r", na.rm = TRUE)
+  expect_equal(na.1$p.q50, 6.5)
+  
+  na.2 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r", slab.fun = weighted.mean, na.rm = TRUE)
+  expect_equal(na.2$value, NA_real_)
+  
+})
+
 test_that("slab calculations: mean, single profile", {
   
   data(sp1, package = 'aqp')
@@ -44,7 +136,14 @@ test_that("slab calculations: mean, single profile", {
   
   # aggregate single profile
   # custom slabs
-  a <- slab(sp1[1, ], fm = ~ prop, strict=TRUE, slab.structure=c(0,5,10,25,100), slab.fun = mean, na.rm=TRUE)
+  a <- slab(
+      sp1[1,],
+      fm = ~ prop,
+      strict = TRUE,
+      slab.structure = c(0, 5, 10, 25, 100),
+      slab.fun = mean,
+      na.rm = TRUE
+    )
   
   # using mean and similar functions will cause slab to store single result / slab into 'value' column
   expect_true(any(grepl('value', names(a))))
@@ -52,13 +151,13 @@ test_that("slab calculations: mean, single profile", {
   # calculations done by hand (DEB)
   # weighted mean, single profile
   # 0-5: 9.400
-  expect_equal(a$value[1], 9.4, tolerance=0.0001)
+  expect_equal(a$value[1], 9.4, tolerance = 0.0001)
   # 5-10: 7.000
-  expect_equal(a$value[2], 7, tolerance=0.0001)
+  expect_equal(a$value[2], 7, tolerance = 0.0001)
   # 10-25: 8.466
-  expect_equal(a$value[3], 8.4666, tolerance=0.0001)
+  expect_equal(a$value[3], 8.4666, tolerance = 0.0001)
   # 25-100: 15.625
-  expect_equal(a$value[4], 15.625, tolerance=0.0001)
+  expect_equal(a$value[4], 15.625, tolerance = 0.0001)
   
 })
 

From 11c4e2380b48da6cd964803c388e09348aceb0d6 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Mon, 22 Aug 2022 11:29:02 -0700
Subject: [PATCH 04/23] `slab()`: pass thru `byhz`

---
 R/slab.R                   | 10 +++++++++-
 man/slab-methods.Rd        |  3 +++
 tests/testthat/test-slab.R | 24 ++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/R/slab.R b/R/slab.R
index 3b7c1e81a..0bd921f8e 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -83,6 +83,7 @@
                   fm,
                   slab.structure = 1,
                   strict = FALSE,
+                  byhz = TRUE,
                   slab.fun = slab_function(method = "numeric"),
                   cpm = 1,
                   weights = NULL,
@@ -133,7 +134,7 @@
 	}
 	
 	# slice into 1cm increments, result is a data.frame
-	data <- dice(x = object, fm = fm.slice, strict = strict, SPC = FALSE, pctMissing = TRUE)
+	data <- dice(x = object, fm = fm.slice, strict = strict, byhz = byhz, SPC = FALSE, pctMissing = TRUE)
   
 	# Note: in this case we need to subtract the extra slice included by slice()/dice()
   # do it to sliced result so that the genSlabLabels have the correct length
@@ -242,6 +243,11 @@
 	  d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1
 
 	} else {
+	  if (missing(slab.fun) || !inherits(slab.fun, 'function')) {
+	    # default numeric aggregation fun is .slab.fun.numeric.default
+	    slab.fun <- .slab.fun.numeric.default
+	  }
+	  
 	  # convert back to data.frame
 	  d.long <- as.data.frame(d.long)
 	  
@@ -311,6 +317,7 @@ setGeneric("slab", function(object,
                               fm,
                               slab.structure = 1,
                               strict = FALSE,
+                              byhz = TRUE,
                               slab.fun = slab_function(method = "numeric"),
                               cpm = 1,
                               weights = NULL,
@@ -371,6 +378,7 @@ setGeneric("slab", function(object,
 #' or user-defined structure (numeric vector). See details below.
 #' @param strict logical: should horizons be strictly checked for
 #' self-consistency?
+#' @param byhz logical: should horizons or whole profiles be removed by logic checks in `strict`? Default `TRUE` removes only offending horizons, `FALSE` removes whole profiles with one or more illogical horizons.
 #' @param slab.fun Function used to process each 'slab' of data, ideally
 #' returning a vector with names attribute. Defaults to a wrapper function
 #' around \code{stats::quantile}. See details.
diff --git a/man/slab-methods.Rd b/man/slab-methods.Rd
index 618c6cde1..586be840a 100644
--- a/man/slab-methods.Rd
+++ b/man/slab-methods.Rd
@@ -15,6 +15,7 @@
   fm,
   slab.structure = 1,
   strict = FALSE,
+  byhz = TRUE,
   slab.fun = slab_function(method = "numeric"),
   cpm = 1,
   weights = NULL,
@@ -38,6 +39,8 @@ or user-defined structure (numeric vector). See details below.}
 \item{strict}{logical: should horizons be strictly checked for
 self-consistency?}
 
+\item{byhz}{logical: should horizons or whole profiles be removed by logic checks in \code{strict}? Default \code{TRUE} removes only offending horizons, \code{FALSE} removes whole profiles with one or more illogical horizons.}
+
 \item{slab.fun}{Function used to process each 'slab' of data, ideally
 returning a vector with names attribute. Defaults to a wrapper function
 around \code{stats::quantile}. See details.}
diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R
index 10a4c5739..2160390a6 100644
--- a/tests/testthat/test-slab.R
+++ b/tests/testthat/test-slab.R
@@ -214,3 +214,27 @@ test_that("edge case: slab.structure[2] > max(x)", {
 })
 
 
+test_that("overlapping horizons", {
+  data(sp4, package = 'aqp')
+  depths(sp4) <- id ~ top + bottom
+  
+  # overlapping horizons results in increased number of slices (according to overlapping thickness)
+  x1 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg)
+  
+  # create overlap
+  sp4@horizons[2,]$bottom <- sp4@horizons[2,]$bottom + 12
+  
+  x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = FALSE)
+  
+  # evaluate logic by profile, not horizon
+  
+  # default case--nothing removed, nothing added
+  expect_equal(nrow(x1), 331)
+  
+  # 12cm of overlap
+  expect_equal(nrow(x2), 331 + 12)
+  
+  # 1 profile removed due to overlap
+  expect_equal(nrow(x3), 289)
+})
+

From c127b61f8256b754d727be0e2511edb7fa3d72d6 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Mon, 22 Aug 2022 13:30:38 -0700
Subject: [PATCH 05/23] Slab tests

---
 tests/testthat/test-slab.R | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R
index 2160390a6..03b3d480b 100644
--- a/tests/testthat/test-slab.R
+++ b/tests/testthat/test-slab.R
@@ -224,17 +224,12 @@ test_that("overlapping horizons", {
   # create overlap
   sp4@horizons[2,]$bottom <- sp4@horizons[2,]$bottom + 12
   
-  x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = FALSE)
   
-  # evaluate logic by profile, not horizon
+  # TODO: these should not cause errors
+  # strict=TRUE byhz=FALSE removes overlap via whole profile
+  expect_error(x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = TRUE, byhz = FALSE))
   
-  # default case--nothing removed, nothing added
-  expect_equal(nrow(x1), 331)
-  
-  # 12cm of overlap
-  expect_equal(nrow(x2), 331 + 12)
-  
-  # 1 profile removed due to overlap
-  expect_equal(nrow(x3), 289)
+  # strict=TRUE / strict=FALSE (with default byhz)
+  expect_error(x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = FALSE))
 })
 

From 5c24a73a25053040364de5d89bf4b2981e6115f1 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Mon, 22 Aug 2022 19:34:49 -0700
Subject: [PATCH 06/23] Add internal replacement genSlabLabels for testing

---
 R/genSlabLabels.R          | 24 +++++++++++++++++++++---
 R/slab.R                   | 28 ++++++++++++++++++++--------
 tests/testthat/test-slab.R | 14 +++++++++-----
 3 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/R/genSlabLabels.R b/R/genSlabLabels.R
index f5c234ee8..2d5ff125d 100644
--- a/R/genSlabLabels.R
+++ b/R/genSlabLabels.R
@@ -1,9 +1,27 @@
-## TODO: documentation / generalization
-# note source data must be "normalized" via dice() first; assumes each profile has the same number of horizons
+# does not assume logical horizons (may overlap, resulting in different # of slices per profile)
+.genSlabLabels2 <- function(spc, diced, slab.structure = 1) {
+  if (length(slab.structure) == 1) {
+    i <- seq(from = 0, length.out = (max(spc) / slab.structure) + 1) * slab.structure
+  } else if (length(slab.structure) > 1) {
+    i <- slab.structure
+  } else {
+    stop("empty slab.structure", call. = FALSE)
+  }
+  j <- diff(i)
+  idx1 <- cumsum(do.call('c', lapply(seq_along(j), function(x) rep(1, j[x]))))
+  idx2 <- do.call('c', lapply(seq_along(j), function(x) rep(x, j[x])))
+  mt <- data.frame(idx1, slab_id = idx2, slab_label = paste0(i[idx2], "-", i[idx2 + 1]))
+  hzdepb <- horizonDepths(spc)[2]
+  colnames(mt) <- c(hzdepb, "slab_id", "slab_label")
+  res <- merge(diced, mt, by = hzdepb, all.x = TRUE, sort = FALSE)
+  res <- res[order(res[[idname(spc)]], res[[hzdepb]]),]
+  factor(res$slab_id, labels = na.omit(unique(res$slab_label)))
+}
 
+# note source data must be "normalized" via dice() first; assumes each profile has the same number of horizons
 # generate labels for slabs
 genSlabLabels <- function(slab.structure = 1, max.d, n.profiles) {
-  
+
   # fixed-size slabs
   if (length(slab.structure) == 1) {
     # generate sequence of segment labels
diff --git a/R/slab.R b/R/slab.R
index 0bd921f8e..7da1d224c 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -98,12 +98,9 @@
 	# get unique list of names in object
 	object.names <- unique(unlist(names(object)))
 
-	# get number of profiles
-	n.profiles <- length(object)
-
-	# max depth
-	max.d <- max(object)
-
+	# get horizon depth column names
+	hzd <- horizonDepths(object)
+	
 	# get name of ID column in original object for later
 	object.ID <- idname(object)
 
@@ -142,7 +139,17 @@
 	if (length(slab.structure) == 2 && any(ldx, na.rm = TRUE)) {
 	  data <- data[which(!ldx), ]  
 	}
-	
+  
+  # get number of profiles
+  n.profiles <- length(unique(data[[idname(object)]]))
+  
+  # note this is now a vector with length = n.profiles  
+  data$.thkslb <- data[[hzd[2]]] - data[[hzd[1]]]
+  
+  # maximum depth from the diced result (not source SPC object)--accounts for logic "fixes"
+  max.d <- aggregate(data$.thkslb, by = list(data[[idname(object)]]), sum, na.rm = TRUE)$x
+  data$.thkslb <- NULL
+  
 	# extract site data
 	site.data <- site(object)
 
@@ -202,7 +209,7 @@
 	##       l <- split(data, data$seg.label, drop=FALSE)
 
 	# add segmenting label to data
- 	data$seg.label <- genSlabLabels(slab.structure = slab.structure, max.d = max.d, n.profiles = n.profiles)
+ 	data$seg.label <- .genSlabLabels2(object, data, slab.structure = slab.structure)
 
 	# if there is no left-hand component in the formula, we are aggregating all data in the collection
 	if (g == '.') {
@@ -781,3 +788,8 @@ slab_function <- function(method = c("numeric", "factor", "hd", "weighted.numeri
          .slab.fun.numeric.default
   )
 }
+
+# for debugging;
+.slabinternal <- function(...) {
+  .slab(...)
+}
diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R
index 03b3d480b..170152419 100644
--- a/tests/testthat/test-slab.R
+++ b/tests/testthat/test-slab.R
@@ -224,12 +224,16 @@ test_that("overlapping horizons", {
   # create overlap
   sp4@horizons[2,]$bottom <- sp4@horizons[2,]$bottom + 12
   
-  
-  # TODO: these should not cause errors
   # strict=TRUE byhz=FALSE removes overlap via whole profile
-  expect_error(x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = TRUE, byhz = FALSE))
+  expect_message({x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = TRUE, byhz = FALSE)})
+  expect_equal(nrow(x2), 245)
+  
+  # strict=TRUE / strict=FALSE (with default byhz) -- we get same number of rows with overlap as without
+  expect_silent(x3 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = FALSE))
+  expect_equal(nrow(x3), 245)
   
-  # strict=TRUE / strict=FALSE (with default byhz)
-  expect_error(x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = FALSE))
+  # try with a larger constant slab structure
+  expect_silent(x3 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, slab.structure = 10, strict = FALSE))
+  expect_equal(nrow(x3), 25)
 })
 

From 19a8c12d06ac747e46999be1b51e540bbeb3e0c8 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Mon, 22 Aug 2022 19:48:21 -0700
Subject: [PATCH 07/23] cleanup

---
 R/slab.R | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index 7da1d224c..774a8f63b 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -139,17 +139,7 @@
 	if (length(slab.structure) == 2 && any(ldx, na.rm = TRUE)) {
 	  data <- data[which(!ldx), ]  
 	}
-  
-  # get number of profiles
-  n.profiles <- length(unique(data[[idname(object)]]))
-  
-  # note this is now a vector with length = n.profiles  
-  data$.thkslb <- data[[hzd[2]]] - data[[hzd[1]]]
-  
-  # maximum depth from the diced result (not source SPC object)--accounts for logic "fixes"
-  max.d <- aggregate(data$.thkslb, by = list(data[[idname(object)]]), sum, na.rm = TRUE)$x
-  data$.thkslb <- NULL
-  
+   
 	# extract site data
 	site.data <- site(object)
 

From 6a7325356ca85a0b43589b6a11656ebb08880ae1 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Mon, 12 Sep 2022 09:40:50 -0700
Subject: [PATCH 08/23] Add `suppressWarnings()` for `data.table::melt()`

---
 R/slab.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index 774a8f63b..dd80aede1 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -216,11 +216,12 @@
  	}
  	
  	# convert wide -> long format
- 	d.long <- data.table::melt(
+ 	# warnings will occur when not all columns are e.g. double
+ 	d.long <- suppressWarnings(data.table::melt(
  	  as.data.table(data[which(!is.na(data$seg.label)), ]),
  	  id.vars = unique(c(object.ID, 'seg.label', g, weights)),
  	  measure.vars = vars
- 	)
+ 	))
  	
  	# make a formula for aggregate()
  	aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep = ''))

From 13b53f8edb6648b58994f727e2c0b0c446d2b7b9 Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.g.brown@usda.gov>
Date: Tue, 27 Sep 2022 22:45:15 -0700
Subject: [PATCH 09/23] Fix errors from misspecified formula

---
 R/slab.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index dd80aede1..1a37b9351 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -116,11 +116,11 @@
 	  stop('must provide a valid formula: groups ~ var1 + var2 + ...', call. = FALSE)
 	
 	# check for bogus left/right side problems with the formula
-	if (any(g %in% object.names) == FALSE & g != '.') # bogus grouping column
-	  stop('group name either missing from formula, or does match any columns in dataframe', call. = FALSE)
+	if (length(g) > 0 && !any(g %in% object.names) && g != '.') # bogus grouping column
+	  stop('group name either missing from formula, or does match any columns in data.frame', call. = FALSE)
 
 	if (any(vars %in% object.names) == FALSE) # bogus column names in right-hand side
-	  stop('column names in formula do not match column names in dataframe', call. = FALSE)
+	  stop('column names in formula do not match column names in data.frame', call. = FALSE)
 	
 	# make formula for slicing
 	if (length(slab.structure) == 2) {

From 3bd0fd2caefcb2d68e2620b2c4668e7d8429cbfc Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.g.brown@usda.gov>
Date: Tue, 27 Sep 2022 22:45:42 -0700
Subject: [PATCH 10/23] standardize data.frame conversion

---
 R/slab.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index 1a37b9351..d8a09a7d6 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -218,7 +218,7 @@
  	# convert wide -> long format
  	# warnings will occur when not all columns are e.g. double
  	d.long <- suppressWarnings(data.table::melt(
- 	  as.data.table(data[which(!is.na(data$seg.label)), ]),
+ 	  data.table::as.data.table(data[which(!is.na(data$seg.label)), ]),
  	  id.vars = unique(c(object.ID, 'seg.label', g, weights)),
  	  measure.vars = vars
  	))
@@ -307,7 +307,7 @@
     attr(d.slabbed, 'original.levels') <- original.levels
   }
 	
-	return(d.slabbed)
+	.as.data.frame.aqp(d.slabbed, aqp_df_class(object))
 }
 
 # setup generic function

From 62979200b09cac8901e84c3cd4257e277de18954 Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.g.brown@usda.gov>
Date: Tue, 27 Sep 2022 22:46:09 -0700
Subject: [PATCH 11/23] use alternate name to avoid NSE collision with `"wt"`

---
 R/slab.R | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index d8a09a7d6..ebd2e720e 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -236,8 +236,8 @@
 	  } else {
 	    FUN <- slab.fun
 	  }
-	  wt <- eval(weights)
-	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[wt]], ...))), by = c('variable', g, 'seg.label')])
+	  .internal_wt <- eval(weights)
+	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[.internal_wt]], ...))), by = c('variable', g, 'seg.label')])
 	  d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1
 
 	} else {
@@ -246,9 +246,6 @@
 	    slab.fun <- .slab.fun.numeric.default
 	  }
 	  
-	  # convert back to data.frame
-	  d.long <- as.data.frame(d.long)
-	  
 	  # reset factor levels in d.long[[value]]
 	  if (.factorFlag) {
 	    d.long[['value']] <- factor(d.long[['value']], levels = original.levels)

From 076715c6ea1572d5213042dd0289aa9b63dbe4dd Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Wed, 28 Sep 2022 08:22:29 -0700
Subject: [PATCH 12/23] Add comments for .genSlabLabels2

---
 R/genSlabLabels.R | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/R/genSlabLabels.R b/R/genSlabLabels.R
index 2d5ff125d..3e8918f3a 100644
--- a/R/genSlabLabels.R
+++ b/R/genSlabLabels.R
@@ -1,5 +1,15 @@
 # does not assume logical horizons (may overlap, resulting in different # of slices per profile)
+#' .genSlabLabels2
+#'
+#' @param spc A SoilProfileCollection
+#' @param diced The `dice()`-ed horizon-level data.frame corresponding to `spc`
+#' @param slab.structure Slab structure, either constant thickness, or vector of boundary depths
+#' @keywords internal
+#' @noRd
+#' @return a factor containing slab IDs, labels are the segment top and bottom separated by `"-"`
 .genSlabLabels2 <- function(spc, diced, slab.structure = 1) {
+  
+  # handle various allowed slab.structure specifications
   if (length(slab.structure) == 1) {
     i <- seq(from = 0, length.out = (max(spc) / slab.structure) + 1) * slab.structure
   } else if (length(slab.structure) > 1) {
@@ -7,14 +17,22 @@
   } else {
     stop("empty slab.structure", call. = FALSE)
   }
+  
+  # calculate a sequence of cumulative depths and corresponding indices for each slab
   j <- diff(i)
   idx1 <- cumsum(do.call('c', lapply(seq_along(j), function(x) rep(1, j[x]))))
   idx2 <- do.call('c', lapply(seq_along(j), function(x) rep(x, j[x])))
   mt <- data.frame(idx1, slab_id = idx2, slab_label = paste0(i[idx2], "-", i[idx2 + 1]))
   hzdepb <- horizonDepths(spc)[2]
   colnames(mt) <- c(hzdepb, "slab_id", "slab_label")
+  
+  # merge the dice'd data.frame result with the slab ID and labels
   res <- merge(diced, mt, by = hzdepb, all.x = TRUE, sort = FALSE)
+  
+  # order by horizon ID and depth
   res <- res[order(res[[idname(spc)]], res[[hzdepb]]),]
+  
+  # return the slab IDs as a factor, with labels denoting each segment top and bottom
   factor(res$slab_id, labels = na.omit(unique(res$slab_label)))
 }
 

From c9315513b0bcaa6f79e0a19fd220a5e2ad505382 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Tue, 23 Aug 2022 15:51:26 -0700
Subject: [PATCH 13/23] Fix segment docs

---
 R/segment.R          | 11 +++++------
 man/segment.Rd       |  6 +++---
 man/singlebracket.Rd |  5 +++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/R/segment.R b/R/segment.R
index 24aa76227..7b27e0ce0 100644
--- a/R/segment.R
+++ b/R/segment.R
@@ -3,20 +3,19 @@
 
 #' Segmenting of Soil Horizon Data by Depth Interval
 #' 
-#' @description This function segments or subdivides horizon data from a `SoilProfileCollection` or `data.frame` by depth interval (e.g. `c(0, 10)`, `c(0, 50)`, or `25:100`). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. `"segment_id"`) are added to each horizon record that correspond with their depth interval (e.g. `025-100`). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples.
-#'
+#' This function segments or subdivides horizon data from a `SoilProfileCollection` or `data.frame` by depth interval (e.g. `c(0, 10)`, `c(0, 50)`, or `25:100`). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. `"segment_id"`) are added to each horizon record that correspond with their depth interval (e.g. `025-100`). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples.
 #' @param object either a `SoilProfileCollection` or `data.frame`
 #' @param intervals a vector of integers over which to slice the horizon data (e.g. `c(25, 100)` or `25:100`)
-#' @param trim logical, when `TRUE` horizons in `object` are truncated to the min/max specified in `intervals`. When `FALSE`, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{`trim = FALSE`}.
+#' @param trim logical, when `TRUE` horizons in `object` are truncated to the min/max specified in `intervals`. When `FALSE`, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and `trim = FALSE`.
 #' @param hzdepcols a character vector of length 2 specifying the names of the horizon depths (e.g. `c("hzdept", "hzdepb")`), only necessary if `object` is a `data.frame`.
 #' 
-#' @details `segment()` performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice} or \code{slab}.
+#' @details `segment()` performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. `slice()` or `slab()`.
 #'
-#' @return Either a `SoilProfileCollection` or `data.frame` with the original horizon data segmented by depth intervals. There are usually more records in the resulting object, one for each time a segment interval partially overlaps with a horizon. A new column called \code{segment_id} identifying the depth interval is added.
+#' @return Either a `SoilProfileCollection` or `data.frame` with the original horizon data segmented by depth intervals. There are usually more records in the resulting object, one for each time a segment interval partially overlaps with a horizon. A new column called `segment_id` identifying the depth interval is added.
 #' 
 #' @author Stephen Roecker
 #' 
-#' @seealso [dice()] [glom()]
+#' @seealso [dice()], [glom()]
 #' 
 #' @export
 #'
diff --git a/man/segment.Rd b/man/segment.Rd
index a3833944b..bfad07f73 100644
--- a/man/segment.Rd
+++ b/man/segment.Rd
@@ -11,7 +11,7 @@ segment(object, intervals, trim = TRUE, hzdepcols = NULL)
 
 \item{intervals}{a vector of integers over which to slice the horizon data (e.g. \code{c(25, 100)} or \code{25:100})}
 
-\item{trim}{logical, when \code{TRUE} horizons in \code{object} are truncated to the min/max specified in \code{intervals}. When \code{FALSE}, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{`trim = FALSE`}.}
+\item{trim}{logical, when \code{TRUE} horizons in \code{object} are truncated to the min/max specified in \code{intervals}. When \code{FALSE}, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{trim = FALSE}.}
 
 \item{hzdepcols}{a character vector of length 2 specifying the names of the horizon depths (e.g. \code{c("hzdept", "hzdepb")}), only necessary if \code{object} is a \code{data.frame}.}
 }
@@ -22,7 +22,7 @@ Either a \code{SoilProfileCollection} or \code{data.frame} with the original hor
 This function segments or subdivides horizon data from a \code{SoilProfileCollection} or \code{data.frame} by depth interval (e.g. \code{c(0, 10)}, \code{c(0, 50)}, or \code{25:100}). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. \code{"segment_id"}) are added to each horizon record that correspond with their depth interval (e.g. \code{025-100}). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples.
 }
 \details{
-\code{segment()} performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice} or \code{slab}.
+\code{segment()} performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice()} or \code{slab()}.
 }
 \examples{
 
@@ -111,7 +111,7 @@ head(test3_agg)
 
 }
 \seealso{
-\code{\link[=dice]{dice()}} \code{\link[=glom]{glom()}}
+\code{\link[=dice]{dice()}}, \code{\link[=glom]{glom()}}
 }
 \author{
 Stephen Roecker
diff --git a/man/singlebracket.Rd b/man/singlebracket.Rd
index 136b2dcb4..491dcc0b2 100644
--- a/man/singlebracket.Rd
+++ b/man/singlebracket.Rd
@@ -1,10 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/SoilProfileCollection-operators.R
-\name{[,SoilProfileCollection-method}
+\name{[,SoilProfileCollection,ANY,ANY,ANY-method}
+\alias{[,SoilProfileCollection,ANY,ANY,ANY-method}
 \alias{[,SoilProfileCollection-method}
 \title{Matrix/data.frame-like access to profiles and horizons in a SoilProfileCollection}
 \usage{
-\S4method{[}{SoilProfileCollection}(x, i, j, ..., drop = TRUE)
+\S4method{[}{SoilProfileCollection,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE)
 }
 \arguments{
 \item{x}{a SoilProfileCollection}

From 33e9851d4f9eb82cba0569b148ebdb4cd026130b Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Wed, 28 Sep 2022 08:34:15 -0700
Subject: [PATCH 14/23] Add comments for data.table weighted slab aggregation

---
 R/slab.R | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index ebd2e720e..0e12d861d 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -237,8 +237,15 @@
 	    FUN <- slab.fun
 	  }
 	  .internal_wt <- eval(weights)
-	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[.internal_wt]], ...))), by = c('variable', g, 'seg.label')])
-	  d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1
+	  
+	  # calculate the summary function FUN for each variable*groups*slablabel, including weights
+	  #  FUN returns a (named) vector of summary statistics, which are converted to "wide" data.frame
+	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[.internal_wt]], ...))), 
+	                                    by = c('variable', g, 'seg.label')])
+	  
+	  # calculate the contributing fraction for each variable*groups*slablabel
+	  d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, 
+	                                            by = c('variable', g, 'seg.label')]$V1
 
 	} else {
 	  if (missing(slab.fun) || !inherits(slab.fun, 'function')) {

From 2da417adb857e60760d5d1951e3a280313310895 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Wed, 28 Sep 2022 09:53:26 -0700
Subject: [PATCH 15/23] convert to data.table  - need to refactor slab
 functions so that a single call is possible for
 weighted/unweighted/numeric/factor

---
 R/slab.R | 93 ++++++++++++++++++++------------------------------------
 1 file changed, 33 insertions(+), 60 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index 0e12d861d..58e2f10e1 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -91,9 +91,6 @@
 
   # define keywords for data.table
   .SD <- NULL; .N <- NULL; value <- NULL
-  
-	# get extra arguments: length of 0 if no extra arguments
-	extra.args <- list(...)
 
 	# get unique list of names in object
 	object.names <- unique(unlist(names(object)))
@@ -180,10 +177,6 @@
   		slab.fun <- .slab.fun.factor.default
 		}
 
-		# add extra arguments required by this function
-		# note that we cannot accept additional arguments when processing categorical values
-		extra.args <- list(cpm = cpm)
-
     # save factor levels for later
     original.levels <- levels(data[[vars]])
 
@@ -215,17 +208,13 @@
  	  stop("column '", weights, "' not present in site data", call. = FALSE)
  	}
  	
- 	# convert wide -> long format
- 	# warnings will occur when not all columns are e.g. double
+ 	# convert wide -> long format; warnings will occur when not all columns are e.g. double
  	d.long <- suppressWarnings(data.table::melt(
  	  data.table::as.data.table(data[which(!is.na(data$seg.label)), ]),
  	  id.vars = unique(c(object.ID, 'seg.label', g, weights)),
  	  measure.vars = vars
  	))
  	
- 	# make a formula for aggregate()
- 	aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep = ''))
- 	
 	# check for weights
 	if (!missing(weights)) {
 	  if (missing(slab.fun)) {
@@ -236,60 +225,44 @@
 	  } else {
 	    FUN <- slab.fun
 	  }
-	  .internal_wt <- eval(weights)
-	  
-	  # calculate the summary function FUN for each variable*groups*slablabel, including weights
-	  #  FUN returns a (named) vector of summary statistics, which are converted to "wide" data.frame
-	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[.internal_wt]], ...))), 
-	                                    by = c('variable', g, 'seg.label')])
-	  
-	  # calculate the contributing fraction for each variable*groups*slablabel
-	  d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, 
-	                                            by = c('variable', g, 'seg.label')]$V1
-
 	} else {
 	  if (missing(slab.fun) || !inherits(slab.fun, 'function')) {
 	    # default numeric aggregation fun is .slab.fun.numeric.default
-	    slab.fun <- .slab.fun.numeric.default
-	  }
-	  
-	  # reset factor levels in d.long[[value]]
-	  if (.factorFlag) {
-	    d.long[['value']] <- factor(d.long[['value']], levels = original.levels)
-	  }
-	  
-	  # process chunks according to group -> variable -> segment
-	  # NA values are not explicitly dropped
-	  if (length(extra.args) == 0) {
-	    d.slabbed <- aggregate(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun)
+	    FUN <- .slab.fun.numeric.default
 	  } else {
-	    # optionally account for extra arguments
-	    the.args <- c(list(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun), extra.args)
-	    d.slabbed <- do.call(what = 'aggregate', args = the.args)
-	  }
-	  
-	  # if slab.fun returns a vector of length > 1 we must:
-	  # convert the complex data.frame returned by aggregate into a regular data.frame
-	  # the column 'value' is a matrix with the results of slab.fun
-	  if (inherits(d.slabbed$value, 'matrix')) {
-	    d.slabbed <- cbind(d.slabbed[, 1:3], d.slabbed$value)
+	    FUN <- slab.fun
 	  }
-	  
-	  # ensure that the names returned from slab.fun are legal
-	  names(d.slabbed) <- make.names(names(d.slabbed)) 
-	  
-   	# estimate contributing fraction
-   	d.slabbed$contributing_fraction <- aggregate(
-   	  aggregate.fm,
-   	  data = d.long,
-   	  na.action = na.pass,
-   	  FUN = function(i) {
-   	    length(na.omit(i)) / length(i)
-   	  }
-   	)$value
-   	
 	}
  	
+ 	.internal_wt <- eval(weights)
+ 	
+ 	# calculate summary function FUN for each variable*groups*slablabel
+ 	#  FUN returns a (named) vector of summary statistics, which are converted to "wide" data.frame
+ 	
+ 	if (.factorFlag) {
+ 	  d.long[['value']] <- factor(d.long[['value']], levels = original.levels)
+ 	}
+ 	
+ 	if (!missing(weights)) {
+   	d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[.internal_wt]], ...))),
+   	                                  by = c('variable', g, 'seg.label')])
+ 	} else {
+ 	  if (!missing(cpm)) {
+ 	    d.slabbed <- as.data.frame(d.long[, as.data.frame(t(unclass(FUN(value, cpm = cpm, ...)))),
+ 	                                      by = c('variable', g, 'seg.label')])
+ 	  } else {
+   	  d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, ...))),
+   	                                    by = c('variable', g, 'seg.label')])
+ 	  }
+ 	}
+ 	
+ 	# calculate contributing fraction for each variable*groups*slablabel
+ 	d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N,
+ 	                                          by = c('variable', g, 'seg.label')]$V1
+ 	
+	# ensure that the names returned from slab.fun are legal
+	names(d.slabbed) <- make.names(names(d.slabbed)) 
+	  
  	# rename default data.table value column names
  	unn <- grepl("^V\\d+$", colnames(d.slabbed))
  	if (any(unn)) {
@@ -311,7 +284,7 @@
     attr(d.slabbed, 'original.levels') <- original.levels
   }
 	
-	.as.data.frame.aqp(d.slabbed, aqp_df_class(object))
+	d.slabbed
 }
 
 # setup generic function

From df51525410bf5061ab59ce5ab40b1e7ac4ce8798 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Wed, 28 Sep 2022 10:01:48 -0700
Subject: [PATCH 16/23] Fix cpm arg in donttest perturb() example

---
 R/perturb.R    | 2 +-
 man/perturb.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/perturb.R b/R/perturb.R
index bbcd8b612..6a5755924 100644
--- a/R/perturb.R
+++ b/R/perturb.R
@@ -77,7 +77,7 @@
 #' # aggregate horizonation of simulated data
 #' # note: set class_prob_mode=2 as profiles were not defined to a constant depth
 #' sim.2$name <- factor(sim.2$name)
-#' a <- slab(sim.2, ~ name, class_prob_mode=2)
+#' a <- slab(sim.2, ~ name, cpm=2)
 #' 
 #' # convert to long format for plotting simplicity
 #' library(data.table)
diff --git a/man/perturb.Rd b/man/perturb.Rd
index 42ed23a42..df54d75e2 100644
--- a/man/perturb.Rd
+++ b/man/perturb.Rd
@@ -92,7 +92,7 @@ mtext(
 # aggregate horizonation of simulated data
 # note: set class_prob_mode=2 as profiles were not defined to a constant depth
 sim.2$name <- factor(sim.2$name)
-a <- slab(sim.2, ~ name, class_prob_mode=2)
+a <- slab(sim.2, ~ name, cpm=2)
 
 # convert to long format for plotting simplicity
 library(data.table)

From 48c7cb60fd0596366c28d3588264de9fe813fa85 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Fri, 30 Sep 2022 11:30:09 -0700
Subject: [PATCH 17/23] Docs, removing old TODO

---
 R/slab.R            | 21 ++++++++-------------
 man/slab-methods.Rd |  9 ++++-----
 2 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/R/slab.R b/R/slab.R
index 58e2f10e1..fb3173f13 100644
--- a/R/slab.R
+++ b/R/slab.R
@@ -155,7 +155,7 @@
 	  vars.numeric.test <- is.numeric(data[[vars]])
 	}
 
-	# sanity check: all numeric, or single character/factor
+	# # sanity check: all numeric, or single character/factor
 	if (any(!vars.numeric.test) & length(vars) > 1) {
 	  stop('mixed variable types and multiple categorical variables are not currently supported in the same call to slab', call. = FALSE)
 	}
@@ -186,11 +186,6 @@
 	  .factorFlag <- FALSE
 	}
 
-	## Note: use of factor labels could be slowing things down...
-	## Note: this assumes ordering is correct in source data / segment labels
-	## TODO: investigate use of split() to speed things up, no need to keep everything in the safe DF:
-	##       l <- split(data, data$seg.label, drop=FALSE)
-
 	# add segmenting label to data
  	data$seg.label <- .genSlabLabels2(object, data, slab.structure = slab.structure)
 
@@ -345,10 +340,10 @@ setGeneric("slab", function(object,
 #' @aliases slab slab2 genSlabLabels slab,SoilProfileCollection-method
 #' @docType methods
 #' @param object a SoilProfileCollection
-#' @param fm A formula: either `groups ~ var1 + var2 + var3' where named
+#' @param fm A formula: either `groups ~ var1 + var2 + var3` where named
 #' variables are aggregated within `groups' OR where named variables are
-#' aggregated across the entire collection ` ~ var1 + var2 + var3'. If `groups`
-#' is a factor it must not contain NA.
+#' aggregated across the entire collection ` ~ var1 + var2 + var3`. If `groups`
+#' is a factor it must not contain `NA`
 #' @param slab.structure A user-defined slab thickness (defined by an integer),
 #' or user-defined structure (numeric vector). See details below.
 #' @param strict logical: should horizons be strictly checked for
@@ -356,16 +351,16 @@ setGeneric("slab", function(object,
 #' @param byhz logical: should horizons or whole profiles be removed by logic checks in `strict`? Default `TRUE` removes only offending horizons, `FALSE` removes whole profiles with one or more illogical horizons.
 #' @param slab.fun Function used to process each 'slab' of data, ideally
 #' returning a vector with names attribute. Defaults to a wrapper function
-#' around \code{stats::quantile}. See details.
+#' around `stats::quantile()`. See details.
 #' @param cpm Strategy for normalizing slice-wise probabilities, dividing by
-#' either: number of profiles with data at the current slice (cpm=1), or by the
+#' either: number of profiles with data at the current slice (`cpm=1`), or by the
 #' number of profiles in the collection (cpm=2). Mode 1 values will always sum
 #' to the contributing fraction, while mode 2 values will always sum to 1.
 #' @param weights Column name containing site-level weights
-#' @param \dots further arguments passed to \code{slab.fun}
+#' @param \dots further arguments passed to `slab.fun`
 #' @return Output is returned in long format, such that slice-wise aggregates
 #' are returned once for each combination of grouping level (optional),
-#' variable described in the \code{fm} argument, and depth-wise 'slab'.
+#' variable described in the `fm` argument, and depth-wise 'slab'.
 #'
 #' Aggregation of numeric variables, using the default slab function:
 #' \describe{ \item{variable}{The names of variables included in the call to
diff --git a/man/slab-methods.Rd b/man/slab-methods.Rd
index 586be840a..fb884e2dd 100644
--- a/man/slab-methods.Rd
+++ b/man/slab-methods.Rd
@@ -29,9 +29,8 @@ slab_function(
 \arguments{
 \item{object}{a SoilProfileCollection}
 
-\item{fm}{A formula: either \verb{groups ~ var1 + var2 + var3' where named variables are aggregated within }groups' OR where named variables are
-aggregated across the entire collection \verb{~ var1 + var2 + var3'. If}groups`
-is a factor it must not contain NA.}
+\item{fm}{A formula: either \code{groups ~ var1 + var2 + var3} where named
+variables are aggregated within \verb{groups' OR where named variables are aggregated across the entire collection } ~ var1 + var2 + var3\verb{. If }groups\verb{is a factor it must not contain}NA`}
 
 \item{slab.structure}{A user-defined slab thickness (defined by an integer),
 or user-defined structure (numeric vector). See details below.}
@@ -43,10 +42,10 @@ self-consistency?}
 
 \item{slab.fun}{Function used to process each 'slab' of data, ideally
 returning a vector with names attribute. Defaults to a wrapper function
-around \code{stats::quantile}. See details.}
+around \code{stats::quantile()}. See details.}
 
 \item{cpm}{Strategy for normalizing slice-wise probabilities, dividing by
-either: number of profiles with data at the current slice (cpm=1), or by the
+either: number of profiles with data at the current slice (\code{cpm=1}), or by the
 number of profiles in the collection (cpm=2). Mode 1 values will always sum
 to the contributing fraction, while mode 2 values will always sum to 1.}
 

From b3880a3cafe7c848acc8629c95e078e5c8a56b09 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Fri, 7 Oct 2022 12:54:46 -0700
Subject: [PATCH 18/23] Fix for dice() related to SPCs with horizon designation
 name set

---
 R/dice.R | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/dice.R b/R/dice.R
index d6ed1358b..784810f48 100644
--- a/R/dice.R
+++ b/R/dice.R
@@ -69,7 +69,7 @@ setGeneric("dice", function(x,
     if (length(fm) > 2) {
       stop("please provide a valid formula", call. = FALSE)
     }
-    ;
+    
     # LHS ~ RHS
     # LHS: "", single integer, vector of integers slices
     # z-index
@@ -182,6 +182,11 @@ setGeneric("dice", function(x,
   tops <- unlist(tops$V1)
   bottoms <- tops + 1
   
+  # use internal hzID if hzidname has been set
+  if (is.null(h.sub[[hzidn]])) {
+    hzidn <- "hzID"
+  } 
+  
   # expand slice IDs (horizon IDs); used to join with horizons
   sliceIDs <- rep(
     h.sub[[hzidn]], 

From 26c06100b2fed8747360162e50a13338c855c03b Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Fri, 7 Oct 2022 15:46:07 -0700
Subject: [PATCH 19/23] Update slab-factor-eval.R

---
 misc/slab/slab-factor-eval.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/misc/slab/slab-factor-eval.R b/misc/slab/slab-factor-eval.R
index 220755682..2931f404d 100644
--- a/misc/slab/slab-factor-eval.R
+++ b/misc/slab/slab-factor-eval.R
@@ -4,7 +4,7 @@ library(aqp)
 
 ## pre-cached/subset data
 # all VALID
-x <- readRDS('clarksville-pedons-final.rds')
+x <- readRDS('misc/slab/clarksville-pedons-final.rds')
 
 # keep track of generalized horizon names for later
 hz.names <- levels(x$genhz)
@@ -20,7 +20,6 @@ s$genhz <- factor(s$genhz, levels = hz.names)
 
 # compute slice-wise probability: slice-wise P always sum to 1
 a <- slab(x, ~ genhz, cpm = 1)
-
  
 # saveRDS(a, file = 'slab-factor-1x.rds')
 

From 1efc5478d4c2fb2e2d4cc8e7ecffa73eb816c94e Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Fri, 7 Oct 2022 15:50:17 -0700
Subject: [PATCH 20/23] Add slab-factor-2x.rds

---
 misc/slab/slab-factor-2x.rds | Bin 0 -> 1606 bytes
 misc/slab/slab-factor-eval.R |  11 +++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 misc/slab/slab-factor-2x.rds

diff --git a/misc/slab/slab-factor-2x.rds b/misc/slab/slab-factor-2x.rds
new file mode 100644
index 0000000000000000000000000000000000000000..4e50da9a1a2a462678ae3006ac5032d6f340f85d
GIT binary patch
literal 1606
zcmV-M2D$kkiwFP!000002JM__coRh!fI|-`6y*?T1rZTEP^dxyQM{-^1x1uYi;9?*
zwnWkfN&qdWs1;EW6$=O+fKsY}D54@NO0ir;umvoFAU!CDKad|jkN>Fern3QNoo6>q
zQ{ZvGr*Ga(x|8f~vh&T%W|Gvz;c$dIBElLt!XsI9M2;9fpj#gvyM#N@_##{d4Qy;|
z&L<+me2#E5#tq%6lT+OjS!#a9qmk*UnPao1RU=c}&WZAx4bz+{UQZTFr{H3!y5pq7
z7q)HNwr$(CZKEB5=F2~e#&ub_Y>mN3TO3H7(7w=b-|du^2Jap@IJH-9so#aA!&vHV
za6wH0zt<Lz?)OaO-v;MPo*x=J?6AWQJM8$shc91RU0IpmN<?1MU9NCduCIXKM@3#T
z^Rd|#eP^ENZ18bk+8yS%s{CQGow_SUzLs6iMw0RkKBPTMOtIhlMLR*Zd>nT8icc`O
zNUX10bzIHb5h|}2uVdA#MY%vNw^uH3hwwbJ!wx$x{0Jyt1{;R{o2uU^*XOFdAm<ZR
ze%d4cuY0d}9oedWahdPmv3^%9)Dm63!E!&_YI{J=`)2uH_7QEjdd>CQupPp3yFq2u
z{v8y@Q@5(!uup8iZq@NF72B&@exGy~i{ozQE#i4~TWw#cK2L#`548hr2kBOwXI33-
zdnrWYK7KE1RB&=yhbV&==X8upXr5#0CHd=|ZKuvN_0sI}#8s`P8k{%67p7j8uytf~
z$1#DrzUpT?Oru`&r5%BV<@(5Kf6!|EsH(5!e5A@-a(l&UzEib#td^I@`&Ic@Wx0H?
zmd6{;m8~AXP+5LI<<C*j?qA{^6*Iu#!n`hnhUVt^y+Pzh*##mWnz>NqN7<W1J~Z=Y
zkp}~{YX1tjWwF-YII!~^{`_9IK#PTS<#Szsh9K;G8NyJuhif}-SYLTiWvlh0)p2RF
zd>3fmll!Zxeyz%KKUp>2AY0YXwpu>amd6FmtlA&5aRilv-5#^;D-rd->|ME9qMU2$
zomDxm6G@4t-X`*a?4spbtT6TV-PxYO$Jd+MYQ7BCTb(mv`SV+~f3<b7s0TyrdJH>X
z+F^*G$H(M6AMAKwZO0+&%k?!LYxO#}>_Gc#R{Qt0?RN*-?>D=iQZ}uFHUew3=s)T%
zpOq53x%(`GD>nB0r)#Gnrml?lg~1gfADX(2F9Djm(w8>-_<h)y{zn;HRnsu2y+yxQ
z_mMPT)R~Wo`XtmoSwD8@-;e!E`_#o0o4TI1%eHOXwk=!reL&T9eA)8%5LH%P*Dn|C
z8B@3a@9z_uxlFWA>e`1yy{<dhe!uKs+sVORU$5iq?R?bqS=U-<vpu!xo8b@v+C1%s
z&<LVHo4u(`<Bo==qO6v0n>lJ`c(#C+pv~xR1+AeC#6T=u3T>esTn6poa_9h8Ku5R|
zIzeZ+3a*B0;9BSc*TMDB6}mwj+yFO1cjy7}&=Yz=Z@3Biz|C+A^o3jDHn?3o-QZ5>
z2MKT&+ztI<03^aaFc1d8y>K54hWlX%JOB^EP#6ZoVFWw`4?_}+gh${}cnn6t<1iYY
z04F3v3TW5(Qy~q~VJx`dNf-xi$bd}nz<8Jd+PqqA#;q47!DN^MPr=iW4O3wnJOj_d
zb1)sAhZo>Q$bpw&2D}V2;T3olUV~ZiI?RSQ;7xc7=D^#K3v*!}%!hYi0ch7W-i7zz
zefR(t!D3hfAHqkl6!Ksh<ip4C2`q;duo6CnRqz>n4y)k{SOZ_eSMW8gg>PUTd<);f
z_ply*fFEH46u?IK2?}8oY=$kc6}CYUY=@s=2keAjpcqPE7wm>T@GF$UUic06!G8E1
z4!}Y90}eqMltTqn!ePhhbJIRA?eo$;FVCOzvP%A$gG^^es{S(v(c;0V$<8d7GufRg
z?l*P1-ErfyJZUaB>()Hhpr5|bUo3<tc)Ooo?4iFvQjY|0y#G#og14u*(>leI>CJK_
zPx87l(?_ReIa9nYPp1A1Ue9>_M;DSkUau!Z+VI9%o+)wq#%r}k*mQ2HtE=19tQc19
zIQzF7nv8LJopHQOMym8w%{*DIbXTU+9e38xLo^I<Pb1FQ(FosuIvl6|1tVsf$vj2?
E06@DZegFUf

literal 0
HcmV?d00001

diff --git a/misc/slab/slab-factor-eval.R b/misc/slab/slab-factor-eval.R
index 2931f404d..93852b714 100644
--- a/misc/slab/slab-factor-eval.R
+++ b/misc/slab/slab-factor-eval.R
@@ -21,6 +21,13 @@ s$genhz <- factor(s$genhz, levels = hz.names)
 # compute slice-wise probability: slice-wise P always sum to 1
 a <- slab(x, ~ genhz, cpm = 1)
  
-# saveRDS(a, file = 'slab-factor-1x.rds')
+# saveRDS(a, file = 'misc/slab/slab-factor-1x.rds')
 
-# saveRDS(a, file = 'e:/temp/slab-factor-2x.rds')
+# saveRDS(a, file = 'misc/slab/slab-factor-2x.rds')
+
+library(daff)
+
+d1 <- readRDS('misc/slab/slab-factor-1x.rds')
+d2 <- readRDS('misc/slab/slab-factor-2x.rds')
+diff_data(d1,d2) |> 
+  render_diff()

From 72b6182543e888f67f34d144d1254375198f3817 Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.g.brown@usda.gov>
Date: Fri, 7 Oct 2022 18:21:00 -0700
Subject: [PATCH 21/23] `dice()`: fillHzGaps recalculates hzID and sets it as
 `hzidname()`  - need to recalculate local var to avoid errors

---
 R/dice.R | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/R/dice.R b/R/dice.R
index 726c519de..7ec186614 100644
--- a/R/dice.R
+++ b/R/dice.R
@@ -97,6 +97,8 @@ setGeneric("dice", function(x,
       # must fill from min(z) --- [gaps] --- max(z) + 1
       x <- fillHzGaps(x, flag = TRUE, to_top = min(z), to_bottom = max(z) + 1)
       
+      # NB: fillHzGaps will recalculate/reset hzID, so update the local var!
+      hzidn <- "hzID"
     }
     
     # check for '.' --> all variables, minus ID/depths
@@ -125,7 +127,6 @@ setGeneric("dice", function(x,
     z <- NULL
   }
   
-  
   # time to work with horizons 
   h <- horizons(x)
   
@@ -172,7 +173,6 @@ setGeneric("dice", function(x,
                     list(.SD[[htb[1]]][i] + j - 1)
                   })
                 ), 
-                
                 # iterate over profiles in the collection (by idname)
                 by = c(idn), 
                 
@@ -182,11 +182,6 @@ setGeneric("dice", function(x,
   tops <- unlist(tops$V1)
   bottoms <- tops + 1
   
-  # use internal hzID if hzidname has been set
-  if (is.null(h.sub[[hzidn]])) {
-    hzidn <- "hzID"
-  } 
-  
   # expand slice IDs (horizon IDs); used to join with horizons
   sliceIDs <- rep(
     h.sub[[hzidn]], 

From 14b4c9a0e585686f757595b3ef24bb158411c9c6 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Thu, 13 Oct 2022 11:46:29 -0700
Subject: [PATCH 22/23] slab: update cpm=2 comparison

---
 misc/slab/slab-factor-1x-cpm1.rds | Bin 0 -> 1605 bytes
 misc/slab/slab-factor-1x-cpm2.rds | Bin 0 -> 1402 bytes
 misc/slab/slab-factor-eval.R      |   9 +++++++++
 3 files changed, 9 insertions(+)
 create mode 100644 misc/slab/slab-factor-1x-cpm1.rds
 create mode 100644 misc/slab/slab-factor-1x-cpm2.rds

diff --git a/misc/slab/slab-factor-1x-cpm1.rds b/misc/slab/slab-factor-1x-cpm1.rds
new file mode 100644
index 0000000000000000000000000000000000000000..9e8c74a7ef75b600a01b8008404e180ecba6e53a
GIT binary patch
literal 1605
zcmV-L2D<qliwFP!000002JM__coRh!fS1yiQ#k~RAR>YX3RO-~yr@D2MU+E}ikOzP
zMA8OI04=Dfh^UB)1q2U3DOEreQ4tlTSgs;i4vQd256a;W><^#Ef7Ex=*#NW7vzvw#
zc--&ln|G7$B)i-0d^3}sO{%YHT5T;ls+LwehD9xA*wB8Rd-2*OJZXS0!sSrQ!NK8t
zB04JIXssTO^|~jzz2jMGeA=rqY3_{CS<<R8DPGrjdCk~VSBl@4$<j%<7|QRc&;sGu
zv17-M9XmE#3pHQ<Sum#KilwVfKHB6!;<z^XK?fcuH8pwn@PY0g*+oI;7Y$*lr^$I$
z1%h5vII_<(F@Kw!D|udItU2L?6HYkse~&=Ew7ar0zwH!x&2Wjr<=KG(K_3-)$;!uO
zmG+)-qJzoD18H|i(5mu>#daDl6Zu+p2^&euH~EnME^&oH?-%U^+46DN9w<J}<N~q2
zVbyW9YDcKNPJE7CuM*`#wcK90z#YN!%n2u)xbP#Sd>L+-`fsX!qg<b>@`9XCRQYL-
z_`Tu1;&WuH`o+b8U$+dpbb+4ea!r=|*>>9la^APf|FVzhyVYY((5CGWmfH;~tM>1p
zIG%=8?S_3~`wgp(cahj$!}8zBaG^NvR^BY$*Rb98h3fw))bf#bsO=!bs`JdQ!)-4`
zXxu00g>~{OrnIeV@}g<&>LxUvX6ePbYhA4-&$aZDtdhi)%_f_iGb|96UYf9Fc!PGM
zLUm2m&rVoIjpj=yLJQmVk=_2F-TF~gU(5MOmAB;firsvtYVX)BFOT=D@~_Hr`EV_d
zH=HZmJ${k0{P&dqkHU7}o&FJV{Y=i!={R6;c23aiMShf>C-R|{^F@A?y;0;tD{m5c
zFjTAdFONqStL+W_JIv-M=(Y3pSa7y{uItYTgp)5L7|Ql=b;k{BDi5k`w|=xcE^U?X
zLd|<}e^u45Rax#QtHv8-tNPh?%SYPsxPX;a`(rhZpmMm|W3_!dMg1>(SGJxg=U942
z`83aoq(n<^75PAR!7@FTTYB5>EZ@N6>nv?IUxw=~uIVlL30k#()pen$2P5nn3@2YY
zVT!QF$K*U8?s#Bz$02LVH8mb<_j_*Hq4wA8_V26P?+&%!Z}ojj@szgu2&~zn|7f^m
zW=hLVU1pkGx}p0&o!SqwbXm7Rm|QCIp`}{{5}>8a0%@~X(1!!*e}u{9RSkpcTl9Mk
zA4v_wS@SVbpG4XfHDkh<MW%;7dqtm)QX8T{pLZ4ub)YWjvr+VEX$_#ED68k&Mq1U3
zl_t;>^ciZ+pgFXFIA{r%LMvzumq8o29NNMa&<?JI_Rs;Yf~(;gxE4CXb#Og&g3b^R
zH^7b11-e2v=ng%gC)@<R;AXf5dc&=78{DoR4!9HgKmyzacSB$32Z?YG^oId(FWd(M
z;eHqd55R*k7>2-57zPi)!;l2S;SqQg9)l6^IE;iRzy-;W0{V9rZb*eR7!4kH62^cR
z(jfzUFc!vvK2J!WLF9)CFcBufQ}8ro!DN^M&%m?r9887h;RSdRrol@v9bSeR@Cv*N
zufa@s9cIBB@Fu(kv*B&XhB+`7=D|BKAM`Prci}yFA3lJEum~2zhwu?BfgD&0x$rT3
z0?S}Itbk8pC42^-!z%a!R>PO@6?_eA;2T&A-@<qBJ*<Nt;73>wd9VR~f_&Hrn_x3+
zfvr#g+u&!|4m;o%D1@D`3wFaE_!Wv^FZ>4kU_bm02jC$50f(R%N}v?V;IMY;+;q-M
z=e%^z%k$^F)X&MPc_n}8t+lv9OIK)taO~Kz<MYO<>*!V2zsZ)b-&a|6JzR+x2e5RT
z|Gp02%Eh8zf3|%{)a!=B9WR$1?)Xx;*QK51>(u1i9AvoC-NtPW8i*I`PIP5@T*+Ry
zc;3+E^~R6Q^rd>dtXuV3tv<#=U$Ia-!QbW7Vprn>lDa1Ny9J+gOYnC$R`UDC8uwL5
z_WAw3bn&ctiZ8>T=}Dg8_hh7vOwDwq_&vT1X~XMf`X<F28?U#rqo(puSy|b-YQ?BZ
z?aX^O)F0*YyW)Abbhq?YjeMD&G*5=h8-K>_A!0+^)9BN7G&-=Kns)MELc~AuJVpQj
D4+J|^

literal 0
HcmV?d00001

diff --git a/misc/slab/slab-factor-1x-cpm2.rds b/misc/slab/slab-factor-1x-cpm2.rds
new file mode 100644
index 0000000000000000000000000000000000000000..2fbc0f231638c1a7f6009e98d9e24e226d202877
GIT binary patch
literal 1402
zcmV-=1%>(_iwFP!000002JPHwcoRh&$MNkcP!vHC6zhoxDpgvFC|<1sUU(I>c-6F|
zC6Xp>2(6+h-gx0vMDf0;2Ofxu7b;pst7s8y1;HLrQC}!8d`<msIveQ3d3Mu~P-NzL
zKK*Ud-DGp@{%1DX6!tL;BgIHfPBK!`m^9KRO&m9TG}rCJg$!~}vK>j9ru8JL$)QX&
zdXwp1ca_^aovFRIyEd)N?VCDNnl-J&>zXdlnO^ED3Hk#}ZN*+_KYfc4YC3i5)TvXa
z&d4;Zdii(Wth9dyHQ45#_0xteTDT-4H}_vZzB02eoT0zf10r(u<O%NFn#OR}H5M>6
zDk7J6E)dS84MpY}L}aa$^W&3-Zo28Fn{K;%3)M@fGb`)cDpA+0Y*l1?O=y8|ZWeXP
zE`OifHfGLWLnHE!P}waAr)v34Vm+<gDC)JGt*j&^KO&n&f5FOiqMsnA{5-4-EnXRs
z^<sW2RnMzkKSGtu#QiwU#bUW=Q|_<q;u**7OgG(h+x1&i^)l8LF@95x8|D4EsxHX&
zL{*>Gi2qyJB<@E})wsAN^zXspY+GO!y4r}8$JtK%19IKBtN(KTW<IRkdEty$50P@e
zL6xe<w?RCgR;v09YsLCosd~N}#rj$)zfM**i09odSBTrS(rN!f^?r(W`FLlv{U9q<
z+nLjhwZ9an<377fCTjnzn{7*Iz0@tb=6HYPbUf&E|EStu%k@ZAx8(kc(|V`s?>Jpv
zKHjgYzp9j%k2U4v4Ld8HK7a8_`Sq0FN3o{U{za_)hMtz)JuZ}><3&4r{~3p->t!69
zvftR<_hF*ypemj2ADteTwySs1);)Q=+9cN7Y0Bed)$s;7RpV@@%f~zA;{tZ6dOUW=
z5mXuL`q-_X{P{+->GboE?vC#heLSG2<o|E{t?Q+35iRETv0Ue4-R=^#pXzQV>UgZv
z{kojdj@Rt^HFo_QyNq_+Z})kMs!ltd&uPm2F}u$Zx_kVhyZysxGuGF^E_d|!H&Nqf
zbzQd3A~TzL_KJBrN(xe8o_CgxJ<tp0*(l~|X&LA%mNn~bKcn-EmHrq2^9;3pF%bJ<
z5C&s^9DpG>5Scg#2jdVNio<X?hT;euiKB2dhT#|-i{mgHSvVdiU<9&}gOSL^D4dAV
zI0+|X3{JtRIL#~%I0Jde$C)?_V=)fnaW>Auxi}B!V*)O~g}4Y8;}R5LA|~NdT!unS
z#^tyISK=yMjUrqF7m87WDR83{Wta*NuEjKXQ4Sycs6Zvm^MuSZh=Q1bDpcb-T#uQ!
z0XJe6Zo<u&jazUlZo}=k19##s%)#Bb2lwJW+>g0<01x6JJd8(BgL!xq^YIuS#{xWo
zC-D@X#xr;p&*6EzfEV!+7NQm};}yJ$*RTk$;|;utx3Cy*V+r2DyLb;v@jgDlhxiB|
zV;Mfdr}zxZ@j1Rg9lpd@Sb?wc4eIeNzQam<j~~#0RrnFBu?9b(5l#3RYq1W$U_CbA
zS2Uvqt!TqWY%;cOn|iy{+oj$vd(L)gZj;fSSF*(jHJv(j>g+M8`X0UN`)_i}-``iI
z>icl5VjN?eng9Jge7kHB<N6()&0@c9Wvs``<?Lyn=f*tF?D{hYK3BQh`k8|aab>S6
zSHR;c_PWL8zAmpft0Lep^>~@L^SY!wYhtXJNXZY5*fyDM-9ce?elRC|AtygL(wZsg
zudsesq1Yb``pd<ofhB%lFyJYk5%l=Vib?~nlAy=$lUBTUz+atZt-RUGPM*!5j*gBY
zoiiqP7+rt6q0bao(3QolmAj>z>gNx5$~-=oH>=CfL!?LfNK?0apsAt9X&77o1<M{L
It2{;k07H||d;kCd

literal 0
HcmV?d00001

diff --git a/misc/slab/slab-factor-eval.R b/misc/slab/slab-factor-eval.R
index 93852b714..0bf44c849 100644
--- a/misc/slab/slab-factor-eval.R
+++ b/misc/slab/slab-factor-eval.R
@@ -22,7 +22,16 @@ s$genhz <- factor(s$genhz, levels = hz.names)
 a <- slab(x, ~ genhz, cpm = 1)
  
 # saveRDS(a, file = 'misc/slab/slab-factor-1x.rds')
+# saveRDS(a, file = 'misc/slab/slab-factor-2x.rds')
+
+a.1 <- slab(x, ~ genhz, cpm = 1)
+a.2 <- slab(x, ~ genhz, cpm = 2)
+# saveRDS(a.1, file = 'misc/slab/slab-factor-1x-cpm1.rds')
+# saveRDS(a.2, file = 'misc/slab/slab-factor-1x-cpm2.rds')
+saveRDS(a.1, file = 'misc/slab/slab-factor-2x-cpm1.rds')
+saveRDS(a.2, file = 'misc/slab/slab-factor-2x-cpm2.rds')
 
+# saveRDS(a, file = 'misc/slab/slab-factor-1x-cpm2.rds')
 # saveRDS(a, file = 'misc/slab/slab-factor-2x.rds')
 
 library(daff)

From 9643d32c9fbeceb7cc8574960d5a3118cfad19a8 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Thu, 13 Oct 2022 11:51:51 -0700
Subject: [PATCH 23/23] slab: add cpm=2 comparison

---
 misc/slab/slab-factor-2x-cpm1.rds | Bin 0 -> 1606 bytes
 misc/slab/slab-factor-2x-cpm2.rds | Bin 0 -> 1399 bytes
 misc/slab/slab-factor-eval.R      |  25 ++++++++-----------------
 3 files changed, 8 insertions(+), 17 deletions(-)
 create mode 100644 misc/slab/slab-factor-2x-cpm1.rds
 create mode 100644 misc/slab/slab-factor-2x-cpm2.rds

diff --git a/misc/slab/slab-factor-2x-cpm1.rds b/misc/slab/slab-factor-2x-cpm1.rds
new file mode 100644
index 0000000000000000000000000000000000000000..4e50da9a1a2a462678ae3006ac5032d6f340f85d
GIT binary patch
literal 1606
zcmV-M2D$kkiwFP!000002JM__coRh!fI|-`6y*?T1rZTEP^dxyQM{-^1x1uYi;9?*
zwnWkfN&qdWs1;EW6$=O+fKsY}D54@NO0ir;umvoFAU!CDKad|jkN>Fern3QNoo6>q
zQ{ZvGr*Ga(x|8f~vh&T%W|Gvz;c$dIBElLt!XsI9M2;9fpj#gvyM#N@_##{d4Qy;|
z&L<+me2#E5#tq%6lT+OjS!#a9qmk*UnPao1RU=c}&WZAx4bz+{UQZTFr{H3!y5pq7
z7q)HNwr$(CZKEB5=F2~e#&ub_Y>mN3TO3H7(7w=b-|du^2Jap@IJH-9so#aA!&vHV
za6wH0zt<Lz?)OaO-v;MPo*x=J?6AWQJM8$shc91RU0IpmN<?1MU9NCduCIXKM@3#T
z^Rd|#eP^ENZ18bk+8yS%s{CQGow_SUzLs6iMw0RkKBPTMOtIhlMLR*Zd>nT8icc`O
zNUX10bzIHb5h|}2uVdA#MY%vNw^uH3hwwbJ!wx$x{0Jyt1{;R{o2uU^*XOFdAm<ZR
ze%d4cuY0d}9oedWahdPmv3^%9)Dm63!E!&_YI{J=`)2uH_7QEjdd>CQupPp3yFq2u
z{v8y@Q@5(!uup8iZq@NF72B&@exGy~i{ozQE#i4~TWw#cK2L#`548hr2kBOwXI33-
zdnrWYK7KE1RB&=yhbV&==X8upXr5#0CHd=|ZKuvN_0sI}#8s`P8k{%67p7j8uytf~
z$1#DrzUpT?Oru`&r5%BV<@(5Kf6!|EsH(5!e5A@-a(l&UzEib#td^I@`&Ic@Wx0H?
zmd6{;m8~AXP+5LI<<C*j?qA{^6*Iu#!n`hnhUVt^y+Pzh*##mWnz>NqN7<W1J~Z=Y
zkp}~{YX1tjWwF-YII!~^{`_9IK#PTS<#Szsh9K;G8NyJuhif}-SYLTiWvlh0)p2RF
zd>3fmll!Zxeyz%KKUp>2AY0YXwpu>amd6FmtlA&5aRilv-5#^;D-rd->|ME9qMU2$
zomDxm6G@4t-X`*a?4spbtT6TV-PxYO$Jd+MYQ7BCTb(mv`SV+~f3<b7s0TyrdJH>X
z+F^*G$H(M6AMAKwZO0+&%k?!LYxO#}>_Gc#R{Qt0?RN*-?>D=iQZ}uFHUew3=s)T%
zpOq53x%(`GD>nB0r)#Gnrml?lg~1gfADX(2F9Djm(w8>-_<h)y{zn;HRnsu2y+yxQ
z_mMPT)R~Wo`XtmoSwD8@-;e!E`_#o0o4TI1%eHOXwk=!reL&T9eA)8%5LH%P*Dn|C
z8B@3a@9z_uxlFWA>e`1yy{<dhe!uKs+sVORU$5iq?R?bqS=U-<vpu!xo8b@v+C1%s
z&<LVHo4u(`<Bo==qO6v0n>lJ`c(#C+pv~xR1+AeC#6T=u3T>esTn6poa_9h8Ku5R|
zIzeZ+3a*B0;9BSc*TMDB6}mwj+yFO1cjy7}&=Yz=Z@3Biz|C+A^o3jDHn?3o-QZ5>
z2MKT&+ztI<03^aaFc1d8y>K54hWlX%JOB^EP#6ZoVFWw`4?_}+gh${}cnn6t<1iYY
z04F3v3TW5(Qy~q~VJx`dNf-xi$bd}nz<8Jd+PqqA#;q47!DN^MPr=iW4O3wnJOj_d
zb1)sAhZo>Q$bpw&2D}V2;T3olUV~ZiI?RSQ;7xc7=D^#K3v*!}%!hYi0ch7W-i7zz
zefR(t!D3hfAHqkl6!Ksh<ip4C2`q;duo6CnRqz>n4y)k{SOZ_eSMW8gg>PUTd<);f
z_ply*fFEH46u?IK2?}8oY=$kc6}CYUY=@s=2keAjpcqPE7wm>T@GF$UUic06!G8E1
z4!}Y90}eqMltTqn!ePhhbJIRA?eo$;FVCOzvP%A$gG^^es{S(v(c;0V$<8d7GufRg
z?l*P1-ErfyJZUaB>()Hhpr5|bUo3<tc)Ooo?4iFvQjY|0y#G#og14u*(>leI>CJK_
zPx87l(?_ReIa9nYPp1A1Ue9>_M;DSkUau!Z+VI9%o+)wq#%r}k*mQ2HtE=19tQc19
zIQzF7nv8LJopHQOMym8w%{*DIbXTU+9e38xLo^I<Pb1FQ(FosuIvl6|1tVsf$vj2?
E06@DZegFUf

literal 0
HcmV?d00001

diff --git a/misc/slab/slab-factor-2x-cpm2.rds b/misc/slab/slab-factor-2x-cpm2.rds
new file mode 100644
index 0000000000000000000000000000000000000000..f19d7efa21a6e58deddae17947ea569a9c7254c1
GIT binary patch
literal 1399
zcmV--1&I0|iwFP!000002JPHwcoRh&$MNkcP!vHC6zhoxDpgvFC|<1sUU(I>c-6F|
zC6Xp>2(6+h-gx0vMDf1B0}n*S3l*)RRkR4Uf?yA*s4tWkzNUURoeeN?p4}uQ6q$LR
zPk)<qH`yG!|CvoT#k~x}NHfw?Q;f6>CXI{<<Hrmg$#wg1A&cCT>_Cd9Y28VBYADl<
zo@A!iUE}snWoqwjuFa@$`zBA9X3Z$`x~9r=W|q6kg8l$gTd)^8PTy>VnogZMb?Vfq
zGcpaUUjAJ$BjcX|jkft`!;~Ql=P!=PO?}smt<J6wXXtNjzldBlahyB9t|^@LO+`$N
zh{$DK3xsn?V~M#25xGFhxrs?bH{Ep8O}E{>h3ci#nU(czrKoFGwkfisF0?>6H;KAr
zm%q<$A2sW*!4dgKsO%PnQ?>j?v7T0L5cOKlHdd07ACWDhzhLEB(NB<5ejZkY7O#%T
z1~I>ts^`_NAEC;n;(nawBC%YwDfd@)@l4=$rkifM?fNaMdKqtv7{95;jq?6nRTt!X
zqN-1;#s95r7WX5kYFykJ`uD(aw$C#Q-GYde$JtK%19IKBtN(KTW<IR^IpK_050P@e
zL6xe<w_ZG-R;v09YsC6nsd~Pf#QIt(zfM**is#)fmy6rA(rN!f^?r(W`9x>5{U9q<
z+nLjhx4)F2<377fCTst!n{7*Kz0@tb=6HYPbUf&E|EStu%k@ZAx8(kc(|V`s?>Jpv
zKHjgYzp9j%k2mGx4Ld8HK7WZy`Sq0FNAaf9{zbg~hVGWJ9v4c|@uHo*|4cyB^)i7?
z*>8;XeVD8|s7j~%N2kZ7?dn~$bx$6zHjDLkn({bVb-Y1N)i~Sf@`+CQxPV=%9*^B|
z1XaenK6dLTf4&iII{iE(*71F^j|X&@{Qr%=b-mOrqQ(6_mg{`H+g-BuQ?X{Uj>kIP
zuge+jc+IX~W7og2%V@{_cAuxH>a^4OoTl6#v-=z&*5enk_79`YcwYy*+|lFTWR0KI
zbve54$8Hw;Q=K|>>g+D5`aYoQb9_1F?;)yE^|^kV*dN<w_J4n$&@Nj=|71sJi`cJQ
z8Sl7X&hB<xI{tcTopo(C^K4J^^vyJ+!#qzr6MLWs%(FMm)3~$HTP$nV+df9u8J>O7
z59S%&`=USg!vGA#{x|@Ga3HdA5Dvy6I24EBa16!~I1)$UXbizII2OlYC~|N-PQWna
zA`ionj}bT#BXJT=#weVEQ*oNPx#0{Hpb%%`ER4n&jK$eF2j}8EoR4w102ksST#QRl
zgz=bwOK};BF%g&J3S5b+a5YMB4O}Qi879Gva#Ub4Jh&E9;6){T@S_UVFwd(s&$ta@
z8fs9B>u^1$;|AP_8Mp~IV<v9Ft+)-h;||=3yD$rP;~w0L`*1&I;{iN~hwv~SK^^Ac
zQOw0-cpUTa1fIlGcpA^(Sv-g5@d94NOPG%Zcp0zYRlJ6UcpY!xO}vFgcpHoH4&KFk
zSc3QQ0Y1b>_!vv^2|mSVSccE>1?ur7zQS^Rjc?F^Z}A;g;CuXlMy$k-ScTR22~B9m
z&sc-C_yy~*9>1amt!P6#HejQ%b=%b2rQR;}cG-Ql%MSTx4t%akxAijzS>nnbHLifi
zRqAz%%e`G*Z%$RfU+(cTZ`XAx1=hr9F_Bgn9JX~b*Sdq^+`?d9_(EP`aJab8zs&Co
z20W$Hf*xN*NqN9k7WDXi)-446Ro0I#l=_1~f2FkIJp=yQ9Bbvxy+-Ox{&aSB4(ggQ
zwbR)Cw;Os*as^#E+)brhx~V??fTzOab9r;N`+11WC?9G1HV-sC^f(P;%fG6;gOfZ)
F0013m*W&;H

literal 0
HcmV?d00001

diff --git a/misc/slab/slab-factor-eval.R b/misc/slab/slab-factor-eval.R
index 0bf44c849..8135e3868 100644
--- a/misc/slab/slab-factor-eval.R
+++ b/misc/slab/slab-factor-eval.R
@@ -6,18 +6,6 @@ library(aqp)
 # all VALID
 x <- readRDS('misc/slab/clarksville-pedons-final.rds')
 
-# keep track of generalized horizon names for later
-hz.names <- levels(x$genhz)
-
-# slice out color and horizon name into 1cm intervals: no aggregation
-max.depth <- 180
-slice.resolution <- 1
-slice.vect <- seq(from = 0, to = max.depth, by = slice.resolution)
-s <- slice(x, slice.vect ~ genhz)
-
-# convert horizon name to factor
-s$genhz <- factor(s$genhz, levels = hz.names)
-
 # compute slice-wise probability: slice-wise P always sum to 1
 a <- slab(x, ~ genhz, cpm = 1)
  
@@ -26,13 +14,11 @@ a <- slab(x, ~ genhz, cpm = 1)
 
 a.1 <- slab(x, ~ genhz, cpm = 1)
 a.2 <- slab(x, ~ genhz, cpm = 2)
+
 # saveRDS(a.1, file = 'misc/slab/slab-factor-1x-cpm1.rds')
 # saveRDS(a.2, file = 'misc/slab/slab-factor-1x-cpm2.rds')
-saveRDS(a.1, file = 'misc/slab/slab-factor-2x-cpm1.rds')
-saveRDS(a.2, file = 'misc/slab/slab-factor-2x-cpm2.rds')
-
-# saveRDS(a, file = 'misc/slab/slab-factor-1x-cpm2.rds')
-# saveRDS(a, file = 'misc/slab/slab-factor-2x.rds')
+# saveRDS(a.1, file = 'misc/slab/slab-factor-2x-cpm1.rds')
+# saveRDS(a.2, file = 'misc/slab/slab-factor-2x-cpm2.rds')
 
 library(daff)
 
@@ -40,3 +26,8 @@ d1 <- readRDS('misc/slab/slab-factor-1x.rds')
 d2 <- readRDS('misc/slab/slab-factor-2x.rds')
 diff_data(d1,d2) |> 
   render_diff()
+
+d1 <- readRDS('misc/slab/slab-factor-1x-cpm2.rds')
+d2 <- readRDS('misc/slab/slab-factor-2x-cpm2.rds')
+diff_data(d1,d2) |> 
+  render_diff()