From df7361a9707c18d5a7c63d1d05683d79d0124eb4 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Mon, 22 Aug 2022 11:06:36 -0700 Subject: [PATCH 01/23] Add an explicit tests of overlapping horizons --- tests/testthat/test-dice.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/testthat/test-dice.R b/tests/testthat/test-dice.R index bd75185e5..7a733553b 100644 --- a/tests/testthat/test-dice.R +++ b/tests/testthat/test-dice.R @@ -271,7 +271,6 @@ test_that("overlapping horizons", { expect_equal(nrow(x3), 289) }) - test_that("dropped profile IDs", { # corrupt depth @@ -295,5 +294,3 @@ test_that("dropped profile IDs", { ) }) - - From 65bd2104e1b004d6600392359ebfd23e4b5ca27e Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Tue, 23 Aug 2022 14:54:41 -0700 Subject: [PATCH 02/23] Fix dice() link in segment() docs --- R/segment.R | 11 ++++++----- man/segment.Rd | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/R/segment.R b/R/segment.R index 7b27e0ce0..24aa76227 100644 --- a/R/segment.R +++ b/R/segment.R @@ -3,19 +3,20 @@ #' Segmenting of Soil Horizon Data by Depth Interval #' -#' This function segments or subdivides horizon data from a `SoilProfileCollection` or `data.frame` by depth interval (e.g. `c(0, 10)`, `c(0, 50)`, or `25:100`). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. `"segment_id"`) are added to each horizon record that correspond with their depth interval (e.g. `025-100`). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples. +#' @description This function segments or subdivides horizon data from a `SoilProfileCollection` or `data.frame` by depth interval (e.g. `c(0, 10)`, `c(0, 50)`, or `25:100`). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. `"segment_id"`) are added to each horizon record that correspond with their depth interval (e.g. `025-100`). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples. +#' #' @param object either a `SoilProfileCollection` or `data.frame` #' @param intervals a vector of integers over which to slice the horizon data (e.g. `c(25, 100)` or `25:100`) -#' @param trim logical, when `TRUE` horizons in `object` are truncated to the min/max specified in `intervals`. When `FALSE`, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and `trim = FALSE`. +#' @param trim logical, when `TRUE` horizons in `object` are truncated to the min/max specified in `intervals`. When `FALSE`, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{`trim = FALSE`}. #' @param hzdepcols a character vector of length 2 specifying the names of the horizon depths (e.g. `c("hzdept", "hzdepb")`), only necessary if `object` is a `data.frame`. #' -#' @details `segment()` performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. `slice()` or `slab()`. +#' @details `segment()` performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice} or \code{slab}. #' -#' @return Either a `SoilProfileCollection` or `data.frame` with the original horizon data segmented by depth intervals. There are usually more records in the resulting object, one for each time a segment interval partially overlaps with a horizon. A new column called `segment_id` identifying the depth interval is added. +#' @return Either a `SoilProfileCollection` or `data.frame` with the original horizon data segmented by depth intervals. There are usually more records in the resulting object, one for each time a segment interval partially overlaps with a horizon. A new column called \code{segment_id} identifying the depth interval is added. #' #' @author Stephen Roecker #' -#' @seealso [dice()], [glom()] +#' @seealso [dice()] [glom()] #' #' @export #' diff --git a/man/segment.Rd b/man/segment.Rd index bfad07f73..a3833944b 100644 --- a/man/segment.Rd +++ b/man/segment.Rd @@ -11,7 +11,7 @@ segment(object, intervals, trim = TRUE, hzdepcols = NULL) \item{intervals}{a vector of integers over which to slice the horizon data (e.g. \code{c(25, 100)} or \code{25:100})} -\item{trim}{logical, when \code{TRUE} horizons in \code{object} are truncated to the min/max specified in \code{intervals}. When \code{FALSE}, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{trim = FALSE}.} +\item{trim}{logical, when \code{TRUE} horizons in \code{object} are truncated to the min/max specified in \code{intervals}. When \code{FALSE}, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{`trim = FALSE`}.} \item{hzdepcols}{a character vector of length 2 specifying the names of the horizon depths (e.g. \code{c("hzdept", "hzdepb")}), only necessary if \code{object} is a \code{data.frame}.} } @@ -22,7 +22,7 @@ Either a \code{SoilProfileCollection} or \code{data.frame} with the original hor This function segments or subdivides horizon data from a \code{SoilProfileCollection} or \code{data.frame} by depth interval (e.g. \code{c(0, 10)}, \code{c(0, 50)}, or \code{25:100}). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. \code{"segment_id"}) are added to each horizon record that correspond with their depth interval (e.g. \code{025-100}). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples. } \details{ -\code{segment()} performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice()} or \code{slab()}. +\code{segment()} performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice} or \code{slab}. } \examples{ @@ -111,7 +111,7 @@ head(test3_agg) } \seealso{ -\code{\link[=dice]{dice()}}, \code{\link[=glom]{glom()}} +\code{\link[=dice]{dice()}} \code{\link[=glom]{glom()}} } \author{ Stephen Roecker From 733d891067ecd6be40130cea35498438778c0f01 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Fri, 19 Aug 2022 09:09:38 -0700 Subject: [PATCH 03/23] Updates to `slab()` - closes #229 - NA_real_ for proper expectation of missing weights passed to base weighted.mean() (minimal custom slab function) --- R/genSlabLabels.R | 58 +++-- R/slab.R | 448 ++++++++++++++++++------------------- man/slab-methods.Rd | 18 +- tests/testthat/test-slab.R | 113 +++++++++- 4 files changed, 364 insertions(+), 273 deletions(-) diff --git a/R/genSlabLabels.R b/R/genSlabLabels.R index 5c9be4caf..f5c234ee8 100644 --- a/R/genSlabLabels.R +++ b/R/genSlabLabels.R @@ -1,54 +1,52 @@ ## TODO: documentation / generalization -# note source data must be normalized via dice() first, e.g. all share the same number of horizons +# note source data must be "normalized" via dice() first; assumes each profile has the same number of horizons # generate labels for slabs -genSlabLabels <- function(slab.structure=1, max.d, n.profiles) { +genSlabLabels <- function(slab.structure = 1, max.d, n.profiles) { # fixed-size slabs - if(length(slab.structure) == 1) { + if (length(slab.structure) == 1) { # generate sequence of segment labels - seg.label <- rep(1:ceiling(max.d / slab.structure), each=slab.structure, length=max.d) + seg.label <- rep(1:ceiling(max.d / slab.structure), each = slab.structure, length = max.d) + # general segment labels - seg.label.levels <- tapply(1:max.d, seg.label, function(i) {r <- range(i); paste(c(r[1]-1, r[2]), collapse='-') } ) + seg.label.levels <- tapply(1:max.d, seg.label, + function(i) { + r <- range(i, na.rm = TRUE) + paste(c(r[1] - 1, r[2]), collapse = '-') + }) } # user-defined slabs - if(length(slab.structure) > 1) { - # trival case where segments start from 0 - if(slab.structure[1] == 0 & length(slab.structure) > 2) - seg.label <- rep(slab.structure[-1], times=diff(slab.structure))[1:max.d] + if (length(slab.structure) > 1) { + if (slab.structure[1] == 0 & length(slab.structure) > 2) { + # trivial case where segments start from 0 + + seg.label <- rep(slab.structure[-1], times = diff(slab.structure))[1:max.d] - # other case: user defines an arbitrary lower and upper limit - else { - if(length(slab.structure) != 2) + } else { + # other case: user defines an arbitrary lower and upper limit + if (length(slab.structure) != 2) stop('user-defined slab boundaries must either start from 0, or contain two values between 0 and the max soil depth') - # proceed + # calculate thickness of slab slab.thickness <- diff(slab.structure) - # how many slices of NA before the slab? - padding.before <- rep(NA, times=slab.structure[1]) - # how many slices of NA afer the slab - padding.after <- rep(NA, times=abs(max.d - slab.structure[2])) + # make a new label for the slab - new.label <- paste(slab.structure, collapse='-') - # generate an index for the slab - slab.idx <- rep(new.label, times=slab.thickness) - # generate the entire index: padding+slab+padding = total number of slices (max_d) - # seg.label <- c(padding.before, slab.idx, padding.after) - seg.label <- slab.idx + new.label <- paste(slab.structure, collapse = '-') + + slab.idx <- rep(new.label, times = slab.thickness) + seg.label <- slab.idx } # generate segment labels - seg.label.levels <- sapply(1:(length(slab.structure)-1), function(i) paste(c(slab.structure[i], slab.structure[i+1]), collapse='-')) + seg.label.levels <- sapply(1:(length(slab.structure) - 1), function(i) { + paste(c(slab.structure[i], slab.structure[i + 1]), collapse = '-') + }) } # covert into a factor that can be used to split profiles into slabs - res <- factor(rep(seg.label, times=n.profiles), labels=seg.label.levels) + res <- factor(rep(seg.label, times = n.profiles), labels = seg.label.levels) return(res) } - - - - - diff --git a/R/slab.R b/R/slab.R index 5fb326d6f..3b7c1e81a 100644 --- a/R/slab.R +++ b/R/slab.R @@ -1,20 +1,16 @@ - - # default slab function for categorical variables # returns a named vector of results # this type of function is compatible with aggregate() -.slab.fun.factor.default <- function(values, cpm) { +.slab.fun.factor.default <- function(values, cpm, ...) { - # probabilities are relative to number of contributing profiles - if(cpm == 1) { - tb <- table(values, useNA='no') + if (cpm == 1) { + # probabilities are relative to number of contributing profiles + tb <- table(values, useNA = 'no') # convert to proportions pt <- prop.table(tb) - } - - # probabilities are relative to total number of profiles - else if(cpm == 2) { - tb <- table(values, useNA='always') + } else if (cpm == 2) { + # probabilities are relative to total number of profiles + tb <- table(values, useNA = 'always') # convert to proportions, # the last column will be named 'NA', and contains the tally of NAs --> remove it pt <- prop.table(tb) @@ -27,62 +23,53 @@ names(pt) <- make.names(levels(values)) return(pt) - } - - -## TODO: make these exported functions with documentation (https://github.com/ncss-tech/aqp/issues/99) - +} +.slab.fun.factor.weighted <- function(values, w, na.rm = TRUE, ...) { + # TODO +} + # default slab function for continuous variables # returns a named vector of results # this type of function is compatible with aggregate() -.slab.fun.numeric.default <- function(values) { - q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95) - ## 2019-10-30: dropping Hmisc as a suggested package - # res <- Hmisc::hdquantile(values, probs=q.probs, na.rm=TRUE) - res <- quantile(values, probs=q.probs, na.rm=TRUE) - names(res) <- paste('p.q', round(q.probs * 100), sep='') - return(res) +.slab.fun.numeric.default <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE, ...) { + .slab.fun.numeric.fast(values, probs = probs, na.rm = na.rm, ...) +} + +# for a site or horizon level weight column, use Hmisc::wtd.quantile +# note that "frequency weights" are assumed to be most common use case, so normwt=FALSE by default, +# normwt=TRUE can be passed as optional argument for slab.fun from slab() interface +.slab.fun.numeric.weighted <- function(values, w, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), normwt = FALSE, na.rm = TRUE, ...) { + if (!requireNamespace('Hmisc', quietly = TRUE)) + stop('please install the `Hmisc` package to use `wtd.quantile()` method', call. = FALSE) + res <- try(Hmisc::wtd.quantile(values, w, probs = probs, na.rm = na.rm, normwt = normwt), silent = TRUE) + if (inherits(res, 'try-error') && grepl("zero non-NA points", res[1])) { + res <- rep(NA_real_, length(probs)) + } else { + names(res) <- paste('p.q', round(probs * 100), sep = '') + } + return(res) } # easy specification of Hmisc::hdquantile if available -.slab.fun.numeric.HD <- function(values) { +.slab.fun.numeric.HD <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE, ...) { # sanity check, need this for color distance eval - if(!requireNamespace('Hmisc', quietly = TRUE)) - stop('pleast install the `Hmisc` package.', call.=FALSE) - - q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95) - - res <- Hmisc::hdquantile(values, probs=q.probs, na.rm=TRUE) - - names(res) <- paste('p.q', round(q.probs * 100), sep='') + if (!requireNamespace('Hmisc', quietly = TRUE)) + stop('please install the `Hmisc` package to use `hdquantile()` method', call. = FALSE) + + res <- Hmisc::hdquantile(values, probs = probs, na.rm = na.rm) + + names(res) <- paste('p.q', round(probs * 100), sep = '') return(res) } -# basic quantile evaluation, better for large datasets -.slab.fun.numeric.fast <- function(values) { - q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95) - res <- quantile(values, probs=q.probs, na.rm=TRUE) - names(res) <- paste('p.q', round(q.probs * 100), sep='') - return(res) +# basic quantile evaluation, better for large data sets +.slab.fun.numeric.fast <- function(values, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE, ...) { + res <- quantile(values, probs = probs, na.rm = na.rm) + names(res) <- paste('p.q', round(probs * 100), sep = '') + return(res) } - -# ## TODO: not yet implemented -# # default slab function for weighted, continuous variables -# # returns a named vector of results -# # this type of function is compatible with aggregate() -# # NOTE: nw (normalize-weights) argument will affect the results! -# .slab.fun.wtd.numeric.default <- function(values, w, nw=TRUE) { -# q.probs <- c(0.05, 0.25, 0.5, 0.75, 0.95) -# res <- wtd.quantile(values, weights=w, probs=q.probs, na.rm=TRUE, normwt=nw) -# names(res) <- paste('p.q', round(q.probs * 100), sep='') -# return(res) -# } - - -## note: slab() uses slice() to resample to 1cm slices, to max(x) or slab.structure[2] if defined - # SoilProfileCollection method # object: SoilProfileCollection # fm: formula defining aggregation @@ -90,20 +77,23 @@ # progress: plyr-progress display # slab.fun: aggregate function applied to data chunks (must return a single row / chunk) # cpm: class probability normalization mode -# weights: character vector naming column containing weights +# weights: character vector naming column in site slot containing weights # ... : extra arguments passed on to slab.fun - -# this is about 40% slower than the old method -.slab <- function(object, fm, slab.structure=1, strict=FALSE, slab.fun=.slab.fun.numeric.default, cpm=1, weights=NULL, ...){ - # issue a message for now that things have changed - # message('usage of slab() has changed considerably, please see the manual page for details') - +.slab <- function(object, + fm, + slab.structure = 1, + strict = FALSE, + slab.fun = slab_function(method = "numeric"), + cpm = 1, + weights = NULL, + ...) { + + # define keywords for data.table + .SD <- NULL; .N <- NULL; value <- NULL + # get extra arguments: length of 0 if no extra arguments extra.args <- list(...) - ## important: change the default behavior of data.frame and melt - opt.original <- options(stringsAsFactors = FALSE) - # get unique list of names in object object.names <- unique(unlist(names(object))) @@ -120,109 +110,78 @@ original.levels <- NULL # extract components of the formula: - g <- all.vars(update(fm, .~0)) # left-hand side - vars <- all.vars(update(fm, 0~.)) # right-hand side - + g <- all.vars(update(fm, . ~ 0)) # left-hand side + vars <- all.vars(update(fm, 0 ~ .)) # right-hand side + # sanity check: - if(! inherits(fm, "formula")) - stop('must provide a valid formula: groups ~ var1 + var2 + ...', call.=FALSE) - + if (!inherits(fm, "formula")) + stop('must provide a valid formula: groups ~ var1 + var2 + ...', call. = FALSE) + # check for bogus left/right side problems with the formula - if(any(g %in% object.names) == FALSE & g != '.') # bogus grouping column - stop('group name either missing from formula, or does match any columns in dataframe', call.=FALSE) - - if(any(vars %in% object.names) == FALSE) # bogus column names in right-hand side - stop('column names in formula do not match column names in dataframe', call.=FALSE) + if (any(g %in% object.names) == FALSE & g != '.') # bogus grouping column + stop('group name either missing from formula, or does match any columns in dataframe', call. = FALSE) - - ## old slice() version ############################### + if (any(vars %in% object.names) == FALSE) # bogus column names in right-hand side + stop('column names in formula do not match column names in dataframe', call. = FALSE) # make formula for slicing - ## TODO: slice() returns an extra row, so only request slices to max-1 - fm.slice <- formula(paste('0:', max(object)-1, ' ~ ', paste(vars, collapse=' + '), sep='')) - - # short-cut for user-defined slab - ## TODO: slice() returns an extra row, so only request slices to max-1 - if(length(slab.structure) == 2 ) - fm.slice <- formula(paste(slab.structure[1], ':', slab.structure[2]-1, ' ~ ', paste(vars, collapse=' + '), sep='')) - - # slice into 1cm increments, result is a data.frame - data <- slice(object, fm.slice, strict=strict, just.the.data=TRUE) - - ####################################################### - - - ## dice() version #################################### - # https://github.com/ncss-tech/aqp/issues/115 - - ## TODO: - # * enforce filling / check assumptions about length - # * consider total re-write vs. adaptation - - # # make formula for slicing - # fm.slice <- formula(paste('0:', max(object), ' ~ ', paste(vars, collapse=' + '), sep='')) - # - # # short-cut for user-defined slab - # if(length(slab.structure) == 2 ) - # fm.slice <- formula(paste(slab.structure[1], ':', slab.structure[2], ' ~ ', paste(vars, collapse=' + '), sep='')) - # - # # slice into 1cm increments, result is a data.frame - # data <- dice(x = object, fm = fm.slice, strict = strict, SPC = FALSE, pctMissing = TRUE) - - ####################################################### - - + if (length(slab.structure) == 2) { + # user-defined single slab + fm.slice <- formula(paste(slab.structure[1], ':', slab.structure[2], ' ~ ', paste(vars, collapse = ' + '), sep = '')) + } else { + fm.slice <- formula(paste('0:', max(object), ' ~ ', paste(vars, collapse = ' + '), sep = '')) + } + # slice into 1cm increments, result is a data.frame + data <- dice(x = object, fm = fm.slice, strict = strict, SPC = FALSE, pctMissing = TRUE) + + # Note: in this case we need to subtract the extra slice included by slice()/dice() + # do it to sliced result so that the genSlabLabels have the correct length + ldx <- data[[horizonDepths(object)[2]]] > slab.structure[2] + if (length(slab.structure) == 2 && any(ldx, na.rm = TRUE)) { + data <- data[which(!ldx), ] + } # extract site data site.data <- site(object) # check groups for NA, if grouping variable was given - if(g != '.') - if(any(is.na(site.data[, g]))) - stop('grouping variable must not contain NA', call.=FALSE) - - ### !!! this runs out of memory when groups contains NA !!! - ## this is generating a lot of extra objects and possibly wasting memory + if (g != '.' && any(is.na(site.data[, g]))) { + stop('grouping variable must not contain NA', call. = FALSE) + } + # merge site data back into the result, this would include site-level weights - data <- merge(x = data, y = site.data, by=object.ID, all.x=TRUE, sort=FALSE) - - # clean-up - rm(object, site.data) - gc() + data <- merge(x = data, y = site.data, by = object.ID, all.x = TRUE, sort = FALSE) # check variable classes - if(length(vars) > 1) { + if (length(vars) > 1) { vars.numeric.test <- sapply(data[, vars], is.numeric) } else { vars.numeric.test <- is.numeric(data[[vars]]) } - # sanity check: all numeric, or single character/factor - if(any(! vars.numeric.test) & length(vars) > 1) { + if (any(!vars.numeric.test) & length(vars) > 1) { stop('mixed variable types and multiple categorical variables are not currently supported in the same call to slab', call. = FALSE) } - # check for single categorical variable, and convert to factor - if(length(vars) == 1 & inherits(data[, vars], c('character', 'factor'))) { + if (length(vars) == 1 & inherits(data[, vars], c('character', 'factor'))) { # if we have a character, then convert to factor - if(inherits(data[[vars]],'character')) { + if (inherits(data[[vars]],'character')) { message('Note: converting categorical variable to factor.') data[[vars]] <- factor(data[[vars]]) } # check for weights - if(!missing(weights)) { - stop('weighted aggregation of categorical variables not yet implemented', call.=FALSE) + if (!missing(weights)) { + slab.fun <- .slab.fun.factor.weighted + } else { + # re-set default function, currently no user-supply-able option + slab.fun <- .slab.fun.factor.default } - - # re-set default function, currently no user-supply-able option - slab.fun <- .slab.fun.factor.default - # add extra arguments required by this function # note that we cannot accept additional arguments when processing categorical values extra.args <- list(cpm = cpm) @@ -236,127 +195,128 @@ .factorFlag <- FALSE } - - #### - #### optimization note: use of factor labels could be slowing things down... - #### + ## Note: use of factor labels could be slowing things down... ## Note: this assumes ordering is correct in source data / segment labels - ## TODO: make sure that nrow(data) == genSlabLabels(slab.structure = slab.structure, max.d = max.d, n.profiles = n.profiles) ## TODO: investigate use of split() to speed things up, no need to keep everything in the safe DF: - ## - ## l <- split(data, data$seg.label, drop=FALSE) - ## - ## ... parallel processing with furrr + ## l <- split(data, data$seg.label, drop=FALSE) # add segmenting label to data data$seg.label <- genSlabLabels(slab.structure = slab.structure, max.d = max.d, n.profiles = n.profiles) # if there is no left-hand component in the formula, we are aggregating all data in the collection - if(g == '.') { + if (g == '.') { g <- 'all.profiles' # add new grouping variable to horizons data[, g] <- 1 } - - - ## - ## TODO: adding weighted-aggregate functionality here - ## we can't use aggregate() for this - ## we can use data.table methods - + + if (length(weights) > 1) { + stop("`weights` argument should be a character vector of length one containing (site-level) weight column name", call. = FALSE) + } + + if (!is.null(weights) && !weights %in% siteNames(object)) { + stop("column '", weights, "' not present in site data", call. = FALSE) + } + + # convert wide -> long format + d.long <- data.table::melt( + as.data.table(data[which(!is.na(data$seg.label)), ]), + id.vars = unique(c(object.ID, 'seg.label', g, weights)), + measure.vars = vars + ) + + # make a formula for aggregate() + aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep = '')) + # check for weights - if(!missing(weights)) - stop('weighted aggregation is not yet supported', call.=FALSE) - - ## TODO: why would this ever happen? - # throwing out those rows with an NA segment label - seg.label.is.not.NA <- which(!is.na(data$seg.label)) - - # convert into long format - # d.long.df <- reshape::melt(data[seg.label.is.not.NA, ], id.vars=c(object.ID, 'seg.label', g), measure.vars=vars) - - # convert wide -> long format - # using data.table::melt() - # note that this will not preserve factor levels when 'vars' is categorical - # must call unique() on `id.vars` - d.long <- melt( - as.data.table(data[seg.label.is.not.NA, ]), - id.vars = unique(c(object.ID, 'seg.label', g)), - measure.vars = vars, - ) - - # convert back to data.frame - d.long <- as.data.frame(d.long) - - # reset factor levels in d.long[[value]] - if(.factorFlag) { - d.long[['value']] <- factor(d.long[['value']], levels = original.levels) - } - - # make a formula for aggregate() - aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep='')) - - ## - ## TODO: this might be the place to implement parallel code in furrr - ## 1. split into a list based on number of cores/cpus available - ## 2. aggregate using seg.label + variable in parallel - ## 3. combine results (a list of data.frames) - ## - ## NOPE: use data.table which will automatically scale to multiple threads - ## - - + if (!missing(weights)) { + if (missing(slab.fun)) { + # default _weighted_ aggregation fun is .slab.fun.numeric.weighted + FUN <- .slab.fun.numeric.weighted + # custom weighted aggregation fun should take first argument as values + # second argument is "weights" argument + } else { + FUN <- slab.fun + } + wt <- eval(weights) + d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[wt]], ...))), by = c('variable', g, 'seg.label')]) + d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1 - # process chunks according to group -> variable -> segment - # NA values are not explicitly dropped - if(length(extra.args) == 0) - d.slabbed <- aggregate(aggregate.fm, data=d.long, na.action=na.pass, FUN=slab.fun) - - # optionally account for extra arguments - else { - the.args <- c(list(aggregate.fm, data=d.long, na.action=na.pass, FUN=slab.fun), extra.args) - d.slabbed <- do.call(what='aggregate', args=the.args) + } else { + # convert back to data.frame + d.long <- as.data.frame(d.long) + + # reset factor levels in d.long[[value]] + if (.factorFlag) { + d.long[['value']] <- factor(d.long[['value']], levels = original.levels) + } + + # process chunks according to group -> variable -> segment + # NA values are not explicitly dropped + if (length(extra.args) == 0) { + d.slabbed <- aggregate(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun) + } else { + # optionally account for extra arguments + the.args <- c(list(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun), extra.args) + d.slabbed <- do.call(what = 'aggregate', args = the.args) + } + + # if slab.fun returns a vector of length > 1 we must: + # convert the complex data.frame returned by aggregate into a regular data.frame + # the column 'value' is a matrix with the results of slab.fun + if (inherits(d.slabbed$value, 'matrix')) { + d.slabbed <- cbind(d.slabbed[, 1:3], d.slabbed$value) + } + + # ensure that the names returned from slab.fun are legal + names(d.slabbed) <- make.names(names(d.slabbed)) + + # estimate contributing fraction + d.slabbed$contributing_fraction <- aggregate( + aggregate.fm, + data = d.long, + na.action = na.pass, + FUN = function(i) { + length(na.omit(i)) / length(i) + } + )$value + } - - - # if slab.fun returns a vector of length > 1 we must: - # convert the complex data.frame returned by aggregate into a regular data.frame - # the column 'value' is a matrix with the results of slab.fun - if(inherits(d.slabbed$value, 'matrix')) - d.slabbed <- cbind(d.slabbed[, 1:3], d.slabbed$value) - - # ensure that the names returned from slab.fun are legal - names(d.slabbed) <- make.names(names(d.slabbed)) - - # convert the slab.label column into top/bottom as integers - slab.depths <- strsplit(as.character(d.slabbed$seg.label), '-') - d.slabbed$top <- as.integer(lapply(slab.depths, function(i) i[1])) - d.slabbed$bottom <- as.integer(lapply(slab.depths, function(i) i[2])) - - # estimate contributing fraction - d.slabbed$contributing_fraction <- aggregate(aggregate.fm, data=d.long, na.action=na.pass, FUN=function(i) {length(na.omit(i)) / length(i)})$value - - + + # rename default data.table value column names + unn <- grepl("^V\\d+$", colnames(d.slabbed)) + if (any(unn)) { + colnames(d.slabbed)[unn] <- paste0("value", ifelse(sum(unn) == 1, "", 1:sum(unn))) + } + + # convert the slab.label column into top/bottom as integers + slab.depths <- strsplit(as.character(d.slabbed$seg.label), '-') + d.slabbed$top <- as.integer(lapply(slab.depths, function(i) i[1])) + d.slabbed$bottom <- as.integer(lapply(slab.depths, function(i) i[2])) + # remove seg.label from result d.slabbed$seg.label <- NULL # if categorical data have been aggregated, set an attribute with the original levels # this allows one to recover values that are not legal column names # e.g. 2Bt is corrupted to X2Bt - if(! is.null(original.levels)) + if (!is.null(original.levels)) { attr(d.slabbed, 'original.levels') <- original.levels - - # reset options: - options(opt.original) - - # done + } + return(d.slabbed) } # setup generic function -# if (!isGeneric("slab")) - setGeneric("slab", function(object, fm, slab.structure=1, strict=FALSE, slab.fun=.slab.fun.numeric.default, cpm=1, weights=NULL, ...) standardGeneric("slab")) +setGeneric("slab", function(object, + fm, + slab.structure = 1, + strict = FALSE, + slab.fun = slab_function(method = "numeric"), + cpm = 1, + weights = NULL, + ...) + standardGeneric("slab")) -# method dispatch #' Slab-Wise Aggregation of SoilProfileCollection Objects #' #' Aggregate soil properties along user-defined `slabs`, and optionally within @@ -373,6 +333,8 @@ #' \code{stats::quantile} from the Hmisc package, which requires at least 2 #' observations per chunk. Note that if `group` is a factor it must not contain #' NAs. +#' +#' `slab()` uses `dice()` to "resample" profiles to 1cm slices from depth 0 to `max(x)` (or `slab.structure[2]`, if defined). #' #' Sometimes \code{slab} is used to conveniently re-arrange data vs. aggregate. #' This is performed by specifying \code{identity} in \code{slab.fun}. See @@ -416,7 +378,7 @@ #' either: number of profiles with data at the current slice (cpm=1), or by the #' number of profiles in the collection (cpm=2). Mode 1 values will always sum #' to the contributing fraction, while mode 2 values will always sum to 1. -#' @param weights Column name containing weights. NOT YET IMPLEMENTED +#' @param weights Column name containing site-level weights #' @param \dots further arguments passed to \code{slab.fun} #' @return Output is returned in long format, such that slice-wise aggregates #' are returned once for each combination of grouping level (optional), @@ -526,7 +488,6 @@ #' scales=list(x=list(alternating=1)) #' ) #' -#' #' ## #' ## categorical variable example #' ## @@ -780,6 +741,7 @@ #' #' #' +# # TODO: update this when weights are implemented #' ## weighted-aggregation -- NOT YET IMPLEMENTED -- #' # load sample data, upgrade to SoilProfileCollection #' data(sp1) @@ -790,4 +752,24 @@ #' sp1$site.wts <- runif(n=length(sp1), min=20, max=100) #' } #' -setMethod(f='slab', signature='SoilProfileCollection', definition=.slab) +setMethod(f = 'slab', + signature = 'SoilProfileCollection', + definition = .slab) + +#' @param method one of `"numeric"`, `"factor"`, `"hd"`, `"weighted.numeric"`, `"weighted.factor"`, `"fast"` +#' @details `slab_function()`: The default `"numeric"` aggregation method is the `"fast"` numeric (quantile) method. Additional methods include `"factor"` for categorical data, `"hd"` to use the Harrell-Davis Distribution-Free Quantile Estimator from the Hmisc package, and "`weighted`" to use a weighted quantile method from the Hmisc package +#' @return `slab_function()`: return an aggregation function based on the `method` argument +#' @rdname slab-methods +#' @export +slab_function <- function(method = c("numeric", "factor", "hd", "weighted.numeric", "weighted.factor", "fast")) { + switch(method, + numeric = .slab.fun.numeric.default, # uses "fast" numeric method + factor = .slab.fun.factor.default, + weighted.factor = .slab.fun.factor.weighted, + hd = .slab.fun.numeric.HD, + weighted.numeric = .slab.fun.numeric.weighted, + weighted.factor = .slab.fun.numeric.weighted, + fast = .slab.fun.numeric.fast, + .slab.fun.numeric.default + ) +} diff --git a/man/slab-methods.Rd b/man/slab-methods.Rd index 305a43998..618c6cde1 100644 --- a/man/slab-methods.Rd +++ b/man/slab-methods.Rd @@ -7,6 +7,7 @@ \alias{slab2} \alias{genSlabLabels} \alias{slab,SoilProfileCollection-method} +\alias{slab_function} \title{Slab-Wise Aggregation of SoilProfileCollection Objects} \usage{ \S4method{slab}{SoilProfileCollection}( @@ -14,11 +15,15 @@ fm, slab.structure = 1, strict = FALSE, - slab.fun = .slab.fun.numeric.default, + slab.fun = slab_function(method = "numeric"), cpm = 1, weights = NULL, ... ) + +slab_function( + method = c("numeric", "factor", "hd", "weighted.numeric", "weighted.factor", "fast") +) } \arguments{ \item{object}{a SoilProfileCollection} @@ -42,9 +47,11 @@ either: number of profiles with data at the current slice (cpm=1), or by the number of profiles in the collection (cpm=2). Mode 1 values will always sum to the contributing fraction, while mode 2 values will always sum to 1.} -\item{weights}{Column name containing weights. NOT YET IMPLEMENTED} +\item{weights}{Column name containing site-level weights} \item{\dots}{further arguments passed to \code{slab.fun}} + +\item{method}{one of \code{"numeric"}, \code{"factor"}, \code{"hd"}, \code{"weighted.numeric"}, \code{"weighted.factor"}, \code{"fast"}} } \value{ Output is returned in long format, such that slice-wise aggregates @@ -74,6 +81,8 @@ fraction of profiles contributing to the aggregate value, ranges from 1/n_profiles to 1.} } + +\code{slab_function()}: return an aggregation function based on the \code{method} argument } \description{ Aggregate soil properties along user-defined \code{slabs}, and optionally within @@ -92,6 +101,8 @@ mean, mean-1SD and mean+1SD. The default slab function wraps observations per chunk. Note that if \code{group} is a factor it must not contain NAs. +\code{slab()} uses \code{dice()} to "resample" profiles to 1cm slices from depth 0 to \code{max(x)} (or \code{slab.structure[2]}, if defined). + Sometimes \code{slab} is used to conveniently re-arrange data vs. aggregate. This is performed by specifying \code{identity} in \code{slab.fun}. See examples beflow for a demonstration of this functionality. @@ -114,6 +125,8 @@ integers}{e.g. c(50, 60): data are aggregated over depths spanning 50--60 units} \item{a vector of 3 or more integers}{e.g. c(0, 5, 10, 50, 100): data are aggregated over the depths spanning 0--5, 5--10, 10--50, 50--100 units} } + +\code{slab_function()}: The default \code{"numeric"} aggregation method is the \code{"fast"} numeric (quantile) method. Additional methods include \code{"factor"} for categorical data, \code{"hd"} to use the Harrell-Davis Distribution-Free Quantile Estimator from the Hmisc package, and "\code{weighted}" to use a weighted quantile method from the Hmisc package } \note{ Arguments to \code{slab} have changed with \code{aqp} 1.5 (2012-12-29) @@ -191,7 +204,6 @@ xyplot(top ~ value | id, data=a, ylab='Depth', scales=list(x=list(alternating=1)) ) - ## ## categorical variable example ## diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R index c45537f79..10a4c5739 100644 --- a/tests/testthat/test-slab.R +++ b/tests/testthat/test-slab.R @@ -1,7 +1,5 @@ context("slab method for SoilProfileCollection objects") - - test_that("basic slab functionality", { data(sp1, package = 'aqp') @@ -35,8 +33,102 @@ test_that("basic slab functionality", { expect_true(any(grepl('p.q', nm))) }) +test_that("slab structure for a single slab", { + data(sp4, package = 'aqp') + depths(sp4) <- id ~ top + bottom + + sp4$group <- c(rep('A',5), rep('B',5)) + a.1 <- slab( + sp4, + fm = ~ sand + silt + clay, + slab.structure = c(0, 10), + slab.fun = mean, + na.rm = TRUE + ) + expect_equal(nrow(a.1), 3) + + # again, this time within groups defined by a site-level attribute: + a.1 <- slab( + sp4, + fm = group ~ sand + silt + clay, + slab.structure = c(0, 10), + slab.fun = mean, + na.rm = TRUE + ) + expect_equal(nrow(a.1), 6) +}) +test_that("extended slab functionality: weighted aggregation", { + + data(sp1, package = 'aqp') + depths(sp1) <- id ~ top + bottom + + sp1$weights <- rep(1:2, length(sp1))[1:length(sp1)] + sp1$wtgrp <- rep(1, length(sp1)) + + # we expect quantile estimates to vary (given weighted v.s. unweighted) + a.0 <- slab(sp1, fm = ~ prop, weights = "weights") + a.1 <- slab(sp1, fm = ~ prop, strict = TRUE, weights = "weights") + a.2 <- slab(sp1, fm = wtgrp ~ prop, strict = TRUE, weights = "weights") + a.3 <- slab(sp1, fm = wtgrp ~ prop, strict = TRUE) + + # expect consistent structure for weighted/unweighted: same column names except for group + ungroupcols <- colnames(a.3) + ungroupcols[2] <- "all.profiles" + expect_equal(colnames(a.0), ungroupcols) + expect_equal(colnames(a.1), ungroupcols) + expect_equal(colnames(a.2), colnames(a.3)) + + # contributing fractions should be identical + expect_true(all(a.1$contributing_fraction == a.2$contributing_fraction)) + expect_true(all(a.2$contributing_fraction == a.3$contributing_fraction)) + + # component weighted averages + # mukey=461268; Doemill-Jokerst, 3 to 8 percent slopes (615) + x <- data.frame(cokey = c(21469960L, 21469960L, 21469960L, 21469960L, + 21469960L, 21469961L, 21469961L, 21469961L), + comppct_r = c(50L, 50L, 50L, 50L, 50L, 40L, 40L, 40L), + hzdept_r = c(0L, 3L, 13L, 23L, 36L, 0L, 3L, 10L), + hzdepb_r = c(3L, 13L, 23L, 36L, 61L, 3L, 10L, 35L), + ph1to1h2o_r = c(6.1, 6.8, 6.7, 6.7, NA, 6.4, 6.5, NA)) + depths(x) <- cokey ~ hzdept_r + hzdepb_r + site(x) <- ~ comppct_r + + # custom function, with actual comppct_r + a.0 <- slab(x, ~ ph1to1h2o_r, + slab.structure = c(0, 10), + weights = "comppct_r", + slab.fun = weighted.mean, + na.rm = TRUE + ) + expect_equal(a.0$value, 6.54, tolerance = 0.005) + + # should match this within the tolerance: + # soilDB::get_SDA_property(property = 'ph1to1h2o_r', method = "weighted average", + # mukeys = 461268, miscellaneous_areas = FALSE, include_minors = TRUE, + # top_depth = 0, bottom_depth = 10) + + # comppct_r adjusted to get higher result more like doemill + x$comppct_r <- c(90, 10) + a.1 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r") + expect_equal(a.1$p.q50, 6.8) + + # comppct_r adjusted to get lower result more like jokerst + x$comppct_r <- c(10, 90) + a.2 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r") + expect_equal(a.2$p.q50, 6.5) + + # comppct_r adjusted to get lower result more like jokerst + x$comppct_r <- c(NA, 90) + na.1 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r", na.rm = TRUE) + expect_equal(na.1$p.q50, 6.5) + + na.2 <- slab(x, ~ ph1to1h2o_r, slab.structure = c(0, 10), weights = "comppct_r", slab.fun = weighted.mean, na.rm = TRUE) + expect_equal(na.2$value, NA_real_) + +}) + test_that("slab calculations: mean, single profile", { data(sp1, package = 'aqp') @@ -44,7 +136,14 @@ test_that("slab calculations: mean, single profile", { # aggregate single profile # custom slabs - a <- slab(sp1[1, ], fm = ~ prop, strict=TRUE, slab.structure=c(0,5,10,25,100), slab.fun = mean, na.rm=TRUE) + a <- slab( + sp1[1,], + fm = ~ prop, + strict = TRUE, + slab.structure = c(0, 5, 10, 25, 100), + slab.fun = mean, + na.rm = TRUE + ) # using mean and similar functions will cause slab to store single result / slab into 'value' column expect_true(any(grepl('value', names(a)))) @@ -52,13 +151,13 @@ test_that("slab calculations: mean, single profile", { # calculations done by hand (DEB) # weighted mean, single profile # 0-5: 9.400 - expect_equal(a$value[1], 9.4, tolerance=0.0001) + expect_equal(a$value[1], 9.4, tolerance = 0.0001) # 5-10: 7.000 - expect_equal(a$value[2], 7, tolerance=0.0001) + expect_equal(a$value[2], 7, tolerance = 0.0001) # 10-25: 8.466 - expect_equal(a$value[3], 8.4666, tolerance=0.0001) + expect_equal(a$value[3], 8.4666, tolerance = 0.0001) # 25-100: 15.625 - expect_equal(a$value[4], 15.625, tolerance=0.0001) + expect_equal(a$value[4], 15.625, tolerance = 0.0001) }) From 11c4e2380b48da6cd964803c388e09348aceb0d6 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Mon, 22 Aug 2022 11:29:02 -0700 Subject: [PATCH 04/23] `slab()`: pass thru `byhz` --- R/slab.R | 10 +++++++++- man/slab-methods.Rd | 3 +++ tests/testthat/test-slab.R | 24 ++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/R/slab.R b/R/slab.R index 3b7c1e81a..0bd921f8e 100644 --- a/R/slab.R +++ b/R/slab.R @@ -83,6 +83,7 @@ fm, slab.structure = 1, strict = FALSE, + byhz = TRUE, slab.fun = slab_function(method = "numeric"), cpm = 1, weights = NULL, @@ -133,7 +134,7 @@ } # slice into 1cm increments, result is a data.frame - data <- dice(x = object, fm = fm.slice, strict = strict, SPC = FALSE, pctMissing = TRUE) + data <- dice(x = object, fm = fm.slice, strict = strict, byhz = byhz, SPC = FALSE, pctMissing = TRUE) # Note: in this case we need to subtract the extra slice included by slice()/dice() # do it to sliced result so that the genSlabLabels have the correct length @@ -242,6 +243,11 @@ d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1 } else { + if (missing(slab.fun) || !inherits(slab.fun, 'function')) { + # default numeric aggregation fun is .slab.fun.numeric.default + slab.fun <- .slab.fun.numeric.default + } + # convert back to data.frame d.long <- as.data.frame(d.long) @@ -311,6 +317,7 @@ setGeneric("slab", function(object, fm, slab.structure = 1, strict = FALSE, + byhz = TRUE, slab.fun = slab_function(method = "numeric"), cpm = 1, weights = NULL, @@ -371,6 +378,7 @@ setGeneric("slab", function(object, #' or user-defined structure (numeric vector). See details below. #' @param strict logical: should horizons be strictly checked for #' self-consistency? +#' @param byhz logical: should horizons or whole profiles be removed by logic checks in `strict`? Default `TRUE` removes only offending horizons, `FALSE` removes whole profiles with one or more illogical horizons. #' @param slab.fun Function used to process each 'slab' of data, ideally #' returning a vector with names attribute. Defaults to a wrapper function #' around \code{stats::quantile}. See details. diff --git a/man/slab-methods.Rd b/man/slab-methods.Rd index 618c6cde1..586be840a 100644 --- a/man/slab-methods.Rd +++ b/man/slab-methods.Rd @@ -15,6 +15,7 @@ fm, slab.structure = 1, strict = FALSE, + byhz = TRUE, slab.fun = slab_function(method = "numeric"), cpm = 1, weights = NULL, @@ -38,6 +39,8 @@ or user-defined structure (numeric vector). See details below.} \item{strict}{logical: should horizons be strictly checked for self-consistency?} +\item{byhz}{logical: should horizons or whole profiles be removed by logic checks in \code{strict}? Default \code{TRUE} removes only offending horizons, \code{FALSE} removes whole profiles with one or more illogical horizons.} + \item{slab.fun}{Function used to process each 'slab' of data, ideally returning a vector with names attribute. Defaults to a wrapper function around \code{stats::quantile}. See details.} diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R index 10a4c5739..2160390a6 100644 --- a/tests/testthat/test-slab.R +++ b/tests/testthat/test-slab.R @@ -214,3 +214,27 @@ test_that("edge case: slab.structure[2] > max(x)", { }) +test_that("overlapping horizons", { + data(sp4, package = 'aqp') + depths(sp4) <- id ~ top + bottom + + # overlapping horizons results in increased number of slices (according to overlapping thickness) + x1 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg) + + # create overlap + sp4@horizons[2,]$bottom <- sp4@horizons[2,]$bottom + 12 + + x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = FALSE) + + # evaluate logic by profile, not horizon + + # default case--nothing removed, nothing added + expect_equal(nrow(x1), 331) + + # 12cm of overlap + expect_equal(nrow(x2), 331 + 12) + + # 1 profile removed due to overlap + expect_equal(nrow(x3), 289) +}) + From c127b61f8256b754d727be0e2511edb7fa3d72d6 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Mon, 22 Aug 2022 13:30:38 -0700 Subject: [PATCH 05/23] Slab tests --- tests/testthat/test-slab.R | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R index 2160390a6..03b3d480b 100644 --- a/tests/testthat/test-slab.R +++ b/tests/testthat/test-slab.R @@ -224,17 +224,12 @@ test_that("overlapping horizons", { # create overlap sp4@horizons[2,]$bottom <- sp4@horizons[2,]$bottom + 12 - x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = FALSE) - # evaluate logic by profile, not horizon + # TODO: these should not cause errors + # strict=TRUE byhz=FALSE removes overlap via whole profile + expect_error(x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = TRUE, byhz = FALSE)) - # default case--nothing removed, nothing added - expect_equal(nrow(x1), 331) - - # 12cm of overlap - expect_equal(nrow(x2), 331 + 12) - - # 1 profile removed due to overlap - expect_equal(nrow(x3), 289) + # strict=TRUE / strict=FALSE (with default byhz) + expect_error(x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = FALSE)) }) From 5c24a73a25053040364de5d89bf4b2981e6115f1 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Mon, 22 Aug 2022 19:34:49 -0700 Subject: [PATCH 06/23] Add internal replacement genSlabLabels for testing --- R/genSlabLabels.R | 24 +++++++++++++++++++++--- R/slab.R | 28 ++++++++++++++++++++-------- tests/testthat/test-slab.R | 14 +++++++++----- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/R/genSlabLabels.R b/R/genSlabLabels.R index f5c234ee8..2d5ff125d 100644 --- a/R/genSlabLabels.R +++ b/R/genSlabLabels.R @@ -1,9 +1,27 @@ -## TODO: documentation / generalization -# note source data must be "normalized" via dice() first; assumes each profile has the same number of horizons +# does not assume logical horizons (may overlap, resulting in different # of slices per profile) +.genSlabLabels2 <- function(spc, diced, slab.structure = 1) { + if (length(slab.structure) == 1) { + i <- seq(from = 0, length.out = (max(spc) / slab.structure) + 1) * slab.structure + } else if (length(slab.structure) > 1) { + i <- slab.structure + } else { + stop("empty slab.structure", call. = FALSE) + } + j <- diff(i) + idx1 <- cumsum(do.call('c', lapply(seq_along(j), function(x) rep(1, j[x])))) + idx2 <- do.call('c', lapply(seq_along(j), function(x) rep(x, j[x]))) + mt <- data.frame(idx1, slab_id = idx2, slab_label = paste0(i[idx2], "-", i[idx2 + 1])) + hzdepb <- horizonDepths(spc)[2] + colnames(mt) <- c(hzdepb, "slab_id", "slab_label") + res <- merge(diced, mt, by = hzdepb, all.x = TRUE, sort = FALSE) + res <- res[order(res[[idname(spc)]], res[[hzdepb]]),] + factor(res$slab_id, labels = na.omit(unique(res$slab_label))) +} +# note source data must be "normalized" via dice() first; assumes each profile has the same number of horizons # generate labels for slabs genSlabLabels <- function(slab.structure = 1, max.d, n.profiles) { - + # fixed-size slabs if (length(slab.structure) == 1) { # generate sequence of segment labels diff --git a/R/slab.R b/R/slab.R index 0bd921f8e..7da1d224c 100644 --- a/R/slab.R +++ b/R/slab.R @@ -98,12 +98,9 @@ # get unique list of names in object object.names <- unique(unlist(names(object))) - # get number of profiles - n.profiles <- length(object) - - # max depth - max.d <- max(object) - + # get horizon depth column names + hzd <- horizonDepths(object) + # get name of ID column in original object for later object.ID <- idname(object) @@ -142,7 +139,17 @@ if (length(slab.structure) == 2 && any(ldx, na.rm = TRUE)) { data <- data[which(!ldx), ] } - + + # get number of profiles + n.profiles <- length(unique(data[[idname(object)]])) + + # note this is now a vector with length = n.profiles + data$.thkslb <- data[[hzd[2]]] - data[[hzd[1]]] + + # maximum depth from the diced result (not source SPC object)--accounts for logic "fixes" + max.d <- aggregate(data$.thkslb, by = list(data[[idname(object)]]), sum, na.rm = TRUE)$x + data$.thkslb <- NULL + # extract site data site.data <- site(object) @@ -202,7 +209,7 @@ ## l <- split(data, data$seg.label, drop=FALSE) # add segmenting label to data - data$seg.label <- genSlabLabels(slab.structure = slab.structure, max.d = max.d, n.profiles = n.profiles) + data$seg.label <- .genSlabLabels2(object, data, slab.structure = slab.structure) # if there is no left-hand component in the formula, we are aggregating all data in the collection if (g == '.') { @@ -781,3 +788,8 @@ slab_function <- function(method = c("numeric", "factor", "hd", "weighted.numeri .slab.fun.numeric.default ) } + +# for debugging; +.slabinternal <- function(...) { + .slab(...) +} diff --git a/tests/testthat/test-slab.R b/tests/testthat/test-slab.R index 03b3d480b..170152419 100644 --- a/tests/testthat/test-slab.R +++ b/tests/testthat/test-slab.R @@ -224,12 +224,16 @@ test_that("overlapping horizons", { # create overlap sp4@horizons[2,]$bottom <- sp4@horizons[2,]$bottom + 12 - - # TODO: these should not cause errors # strict=TRUE byhz=FALSE removes overlap via whole profile - expect_error(x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = TRUE, byhz = FALSE)) + expect_message({x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = TRUE, byhz = FALSE)}) + expect_equal(nrow(x2), 245) + + # strict=TRUE / strict=FALSE (with default byhz) -- we get same number of rows with overlap as without + expect_silent(x3 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = FALSE)) + expect_equal(nrow(x3), 245) - # strict=TRUE / strict=FALSE (with default byhz) - expect_error(x2 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, strict = FALSE)) + # try with a larger constant slab structure + expect_silent(x3 <- slab(sp4, ~ K + Mg + Ca + CEC_7 + ex_Ca_to_Mg, slab.structure = 10, strict = FALSE)) + expect_equal(nrow(x3), 25) }) From 19a8c12d06ac747e46999be1b51e540bbeb3e0c8 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Mon, 22 Aug 2022 19:48:21 -0700 Subject: [PATCH 07/23] cleanup --- R/slab.R | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/R/slab.R b/R/slab.R index 7da1d224c..774a8f63b 100644 --- a/R/slab.R +++ b/R/slab.R @@ -139,17 +139,7 @@ if (length(slab.structure) == 2 && any(ldx, na.rm = TRUE)) { data <- data[which(!ldx), ] } - - # get number of profiles - n.profiles <- length(unique(data[[idname(object)]])) - - # note this is now a vector with length = n.profiles - data$.thkslb <- data[[hzd[2]]] - data[[hzd[1]]] - - # maximum depth from the diced result (not source SPC object)--accounts for logic "fixes" - max.d <- aggregate(data$.thkslb, by = list(data[[idname(object)]]), sum, na.rm = TRUE)$x - data$.thkslb <- NULL - + # extract site data site.data <- site(object) From 6a7325356ca85a0b43589b6a11656ebb08880ae1 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Mon, 12 Sep 2022 09:40:50 -0700 Subject: [PATCH 08/23] Add `suppressWarnings()` for `data.table::melt()` --- R/slab.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/slab.R b/R/slab.R index 774a8f63b..dd80aede1 100644 --- a/R/slab.R +++ b/R/slab.R @@ -216,11 +216,12 @@ } # convert wide -> long format - d.long <- data.table::melt( + # warnings will occur when not all columns are e.g. double + d.long <- suppressWarnings(data.table::melt( as.data.table(data[which(!is.na(data$seg.label)), ]), id.vars = unique(c(object.ID, 'seg.label', g, weights)), measure.vars = vars - ) + )) # make a formula for aggregate() aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep = '')) From 13b53f8edb6648b58994f727e2c0b0c446d2b7b9 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 27 Sep 2022 22:45:15 -0700 Subject: [PATCH 09/23] Fix errors from misspecified formula --- R/slab.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/slab.R b/R/slab.R index dd80aede1..1a37b9351 100644 --- a/R/slab.R +++ b/R/slab.R @@ -116,11 +116,11 @@ stop('must provide a valid formula: groups ~ var1 + var2 + ...', call. = FALSE) # check for bogus left/right side problems with the formula - if (any(g %in% object.names) == FALSE & g != '.') # bogus grouping column - stop('group name either missing from formula, or does match any columns in dataframe', call. = FALSE) + if (length(g) > 0 && !any(g %in% object.names) && g != '.') # bogus grouping column + stop('group name either missing from formula, or does match any columns in data.frame', call. = FALSE) if (any(vars %in% object.names) == FALSE) # bogus column names in right-hand side - stop('column names in formula do not match column names in dataframe', call. = FALSE) + stop('column names in formula do not match column names in data.frame', call. = FALSE) # make formula for slicing if (length(slab.structure) == 2) { From 3bd0fd2caefcb2d68e2620b2c4668e7d8429cbfc Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 27 Sep 2022 22:45:42 -0700 Subject: [PATCH 10/23] standardize data.frame conversion --- R/slab.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/slab.R b/R/slab.R index 1a37b9351..d8a09a7d6 100644 --- a/R/slab.R +++ b/R/slab.R @@ -218,7 +218,7 @@ # convert wide -> long format # warnings will occur when not all columns are e.g. double d.long <- suppressWarnings(data.table::melt( - as.data.table(data[which(!is.na(data$seg.label)), ]), + data.table::as.data.table(data[which(!is.na(data$seg.label)), ]), id.vars = unique(c(object.ID, 'seg.label', g, weights)), measure.vars = vars )) @@ -307,7 +307,7 @@ attr(d.slabbed, 'original.levels') <- original.levels } - return(d.slabbed) + .as.data.frame.aqp(d.slabbed, aqp_df_class(object)) } # setup generic function From 62979200b09cac8901e84c3cd4257e277de18954 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 27 Sep 2022 22:46:09 -0700 Subject: [PATCH 11/23] use alternate name to avoid NSE collision with `"wt"` --- R/slab.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/R/slab.R b/R/slab.R index d8a09a7d6..ebd2e720e 100644 --- a/R/slab.R +++ b/R/slab.R @@ -236,8 +236,8 @@ } else { FUN <- slab.fun } - wt <- eval(weights) - d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[wt]], ...))), by = c('variable', g, 'seg.label')]) + .internal_wt <- eval(weights) + d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[.internal_wt]], ...))), by = c('variable', g, 'seg.label')]) d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1 } else { @@ -246,9 +246,6 @@ slab.fun <- .slab.fun.numeric.default } - # convert back to data.frame - d.long <- as.data.frame(d.long) - # reset factor levels in d.long[[value]] if (.factorFlag) { d.long[['value']] <- factor(d.long[['value']], levels = original.levels) From 076715c6ea1572d5213042dd0289aa9b63dbe4dd Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 28 Sep 2022 08:22:29 -0700 Subject: [PATCH 12/23] Add comments for .genSlabLabels2 --- R/genSlabLabels.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/R/genSlabLabels.R b/R/genSlabLabels.R index 2d5ff125d..3e8918f3a 100644 --- a/R/genSlabLabels.R +++ b/R/genSlabLabels.R @@ -1,5 +1,15 @@ # does not assume logical horizons (may overlap, resulting in different # of slices per profile) +#' .genSlabLabels2 +#' +#' @param spc A SoilProfileCollection +#' @param diced The `dice()`-ed horizon-level data.frame corresponding to `spc` +#' @param slab.structure Slab structure, either constant thickness, or vector of boundary depths +#' @keywords internal +#' @noRd +#' @return a factor containing slab IDs, labels are the segment top and bottom separated by `"-"` .genSlabLabels2 <- function(spc, diced, slab.structure = 1) { + + # handle various allowed slab.structure specifications if (length(slab.structure) == 1) { i <- seq(from = 0, length.out = (max(spc) / slab.structure) + 1) * slab.structure } else if (length(slab.structure) > 1) { @@ -7,14 +17,22 @@ } else { stop("empty slab.structure", call. = FALSE) } + + # calculate a sequence of cumulative depths and corresponding indices for each slab j <- diff(i) idx1 <- cumsum(do.call('c', lapply(seq_along(j), function(x) rep(1, j[x])))) idx2 <- do.call('c', lapply(seq_along(j), function(x) rep(x, j[x]))) mt <- data.frame(idx1, slab_id = idx2, slab_label = paste0(i[idx2], "-", i[idx2 + 1])) hzdepb <- horizonDepths(spc)[2] colnames(mt) <- c(hzdepb, "slab_id", "slab_label") + + # merge the dice'd data.frame result with the slab ID and labels res <- merge(diced, mt, by = hzdepb, all.x = TRUE, sort = FALSE) + + # order by horizon ID and depth res <- res[order(res[[idname(spc)]], res[[hzdepb]]),] + + # return the slab IDs as a factor, with labels denoting each segment top and bottom factor(res$slab_id, labels = na.omit(unique(res$slab_label))) } From c9315513b0bcaa6f79e0a19fd220a5e2ad505382 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Tue, 23 Aug 2022 15:51:26 -0700 Subject: [PATCH 13/23] Fix segment docs --- R/segment.R | 11 +++++------ man/segment.Rd | 6 +++--- man/singlebracket.Rd | 5 +++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/segment.R b/R/segment.R index 24aa76227..7b27e0ce0 100644 --- a/R/segment.R +++ b/R/segment.R @@ -3,20 +3,19 @@ #' Segmenting of Soil Horizon Data by Depth Interval #' -#' @description This function segments or subdivides horizon data from a `SoilProfileCollection` or `data.frame` by depth interval (e.g. `c(0, 10)`, `c(0, 50)`, or `25:100`). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. `"segment_id"`) are added to each horizon record that correspond with their depth interval (e.g. `025-100`). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples. -#' +#' This function segments or subdivides horizon data from a `SoilProfileCollection` or `data.frame` by depth interval (e.g. `c(0, 10)`, `c(0, 50)`, or `25:100`). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. `"segment_id"`) are added to each horizon record that correspond with their depth interval (e.g. `025-100`). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples. #' @param object either a `SoilProfileCollection` or `data.frame` #' @param intervals a vector of integers over which to slice the horizon data (e.g. `c(25, 100)` or `25:100`) -#' @param trim logical, when `TRUE` horizons in `object` are truncated to the min/max specified in `intervals`. When `FALSE`, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{`trim = FALSE`}. +#' @param trim logical, when `TRUE` horizons in `object` are truncated to the min/max specified in `intervals`. When `FALSE`, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and `trim = FALSE`. #' @param hzdepcols a character vector of length 2 specifying the names of the horizon depths (e.g. `c("hzdept", "hzdepb")`), only necessary if `object` is a `data.frame`. #' -#' @details `segment()` performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice} or \code{slab}. +#' @details `segment()` performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. `slice()` or `slab()`. #' -#' @return Either a `SoilProfileCollection` or `data.frame` with the original horizon data segmented by depth intervals. There are usually more records in the resulting object, one for each time a segment interval partially overlaps with a horizon. A new column called \code{segment_id} identifying the depth interval is added. +#' @return Either a `SoilProfileCollection` or `data.frame` with the original horizon data segmented by depth intervals. There are usually more records in the resulting object, one for each time a segment interval partially overlaps with a horizon. A new column called `segment_id` identifying the depth interval is added. #' #' @author Stephen Roecker #' -#' @seealso [dice()] [glom()] +#' @seealso [dice()], [glom()] #' #' @export #' diff --git a/man/segment.Rd b/man/segment.Rd index a3833944b..bfad07f73 100644 --- a/man/segment.Rd +++ b/man/segment.Rd @@ -11,7 +11,7 @@ segment(object, intervals, trim = TRUE, hzdepcols = NULL) \item{intervals}{a vector of integers over which to slice the horizon data (e.g. \code{c(25, 100)} or \code{25:100})} -\item{trim}{logical, when \code{TRUE} horizons in \code{object} are truncated to the min/max specified in \code{intervals}. When \code{FALSE}, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{`trim = FALSE`}.} +\item{trim}{logical, when \code{TRUE} horizons in \code{object} are truncated to the min/max specified in \code{intervals}. When \code{FALSE}, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and \code{trim = FALSE}.} \item{hzdepcols}{a character vector of length 2 specifying the names of the horizon depths (e.g. \code{c("hzdept", "hzdepb")}), only necessary if \code{object} is a \code{data.frame}.} } @@ -22,7 +22,7 @@ Either a \code{SoilProfileCollection} or \code{data.frame} with the original hor This function segments or subdivides horizon data from a \code{SoilProfileCollection} or \code{data.frame} by depth interval (e.g. \code{c(0, 10)}, \code{c(0, 50)}, or \code{25:100}). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. \code{"segment_id"}) are added to each horizon record that correspond with their depth interval (e.g. \code{025-100}). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples. } \details{ -\code{segment()} performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice} or \code{slab}. +\code{segment()} performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. \code{slice()} or \code{slab()}. } \examples{ @@ -111,7 +111,7 @@ head(test3_agg) } \seealso{ -\code{\link[=dice]{dice()}} \code{\link[=glom]{glom()}} +\code{\link[=dice]{dice()}}, \code{\link[=glom]{glom()}} } \author{ Stephen Roecker diff --git a/man/singlebracket.Rd b/man/singlebracket.Rd index 136b2dcb4..491dcc0b2 100644 --- a/man/singlebracket.Rd +++ b/man/singlebracket.Rd @@ -1,10 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SoilProfileCollection-operators.R -\name{[,SoilProfileCollection-method} +\name{[,SoilProfileCollection,ANY,ANY,ANY-method} +\alias{[,SoilProfileCollection,ANY,ANY,ANY-method} \alias{[,SoilProfileCollection-method} \title{Matrix/data.frame-like access to profiles and horizons in a SoilProfileCollection} \usage{ -\S4method{[}{SoilProfileCollection}(x, i, j, ..., drop = TRUE) +\S4method{[}{SoilProfileCollection,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) } \arguments{ \item{x}{a SoilProfileCollection} From 33e9851d4f9eb82cba0569b148ebdb4cd026130b Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 28 Sep 2022 08:34:15 -0700 Subject: [PATCH 14/23] Add comments for data.table weighted slab aggregation --- R/slab.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/R/slab.R b/R/slab.R index ebd2e720e..0e12d861d 100644 --- a/R/slab.R +++ b/R/slab.R @@ -237,8 +237,15 @@ FUN <- slab.fun } .internal_wt <- eval(weights) - d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[.internal_wt]], ...))), by = c('variable', g, 'seg.label')]) - d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, by = c('variable', g, 'seg.label')]$V1 + + # calculate the summary function FUN for each variable*groups*slablabel, including weights + # FUN returns a (named) vector of summary statistics, which are converted to "wide" data.frame + d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[.internal_wt]], ...))), + by = c('variable', g, 'seg.label')]) + + # calculate the contributing fraction for each variable*groups*slablabel + d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, + by = c('variable', g, 'seg.label')]$V1 } else { if (missing(slab.fun) || !inherits(slab.fun, 'function')) { From 2da417adb857e60760d5d1951e3a280313310895 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 28 Sep 2022 09:53:26 -0700 Subject: [PATCH 15/23] convert to data.table - need to refactor slab functions so that a single call is possible for weighted/unweighted/numeric/factor --- R/slab.R | 93 ++++++++++++++++++++------------------------------------ 1 file changed, 33 insertions(+), 60 deletions(-) diff --git a/R/slab.R b/R/slab.R index 0e12d861d..58e2f10e1 100644 --- a/R/slab.R +++ b/R/slab.R @@ -91,9 +91,6 @@ # define keywords for data.table .SD <- NULL; .N <- NULL; value <- NULL - - # get extra arguments: length of 0 if no extra arguments - extra.args <- list(...) # get unique list of names in object object.names <- unique(unlist(names(object))) @@ -180,10 +177,6 @@ slab.fun <- .slab.fun.factor.default } - # add extra arguments required by this function - # note that we cannot accept additional arguments when processing categorical values - extra.args <- list(cpm = cpm) - # save factor levels for later original.levels <- levels(data[[vars]]) @@ -215,17 +208,13 @@ stop("column '", weights, "' not present in site data", call. = FALSE) } - # convert wide -> long format - # warnings will occur when not all columns are e.g. double + # convert wide -> long format; warnings will occur when not all columns are e.g. double d.long <- suppressWarnings(data.table::melt( data.table::as.data.table(data[which(!is.na(data$seg.label)), ]), id.vars = unique(c(object.ID, 'seg.label', g, weights)), measure.vars = vars )) - # make a formula for aggregate() - aggregate.fm <- as.formula(paste('value ~ seg.label + variable + ', g, sep = '')) - # check for weights if (!missing(weights)) { if (missing(slab.fun)) { @@ -236,60 +225,44 @@ } else { FUN <- slab.fun } - .internal_wt <- eval(weights) - - # calculate the summary function FUN for each variable*groups*slablabel, including weights - # FUN returns a (named) vector of summary statistics, which are converted to "wide" data.frame - d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[.internal_wt]], ...))), - by = c('variable', g, 'seg.label')]) - - # calculate the contributing fraction for each variable*groups*slablabel - d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, - by = c('variable', g, 'seg.label')]$V1 - } else { if (missing(slab.fun) || !inherits(slab.fun, 'function')) { # default numeric aggregation fun is .slab.fun.numeric.default - slab.fun <- .slab.fun.numeric.default - } - - # reset factor levels in d.long[[value]] - if (.factorFlag) { - d.long[['value']] <- factor(d.long[['value']], levels = original.levels) - } - - # process chunks according to group -> variable -> segment - # NA values are not explicitly dropped - if (length(extra.args) == 0) { - d.slabbed <- aggregate(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun) + FUN <- .slab.fun.numeric.default } else { - # optionally account for extra arguments - the.args <- c(list(aggregate.fm, data = d.long, na.action = na.pass, FUN = slab.fun), extra.args) - d.slabbed <- do.call(what = 'aggregate', args = the.args) - } - - # if slab.fun returns a vector of length > 1 we must: - # convert the complex data.frame returned by aggregate into a regular data.frame - # the column 'value' is a matrix with the results of slab.fun - if (inherits(d.slabbed$value, 'matrix')) { - d.slabbed <- cbind(d.slabbed[, 1:3], d.slabbed$value) + FUN <- slab.fun } - - # ensure that the names returned from slab.fun are legal - names(d.slabbed) <- make.names(names(d.slabbed)) - - # estimate contributing fraction - d.slabbed$contributing_fraction <- aggregate( - aggregate.fm, - data = d.long, - na.action = na.pass, - FUN = function(i) { - length(na.omit(i)) / length(i) - } - )$value - } + .internal_wt <- eval(weights) + + # calculate summary function FUN for each variable*groups*slablabel + # FUN returns a (named) vector of summary statistics, which are converted to "wide" data.frame + + if (.factorFlag) { + d.long[['value']] <- factor(d.long[['value']], levels = original.levels) + } + + if (!missing(weights)) { + d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, .SD[[.internal_wt]], ...))), + by = c('variable', g, 'seg.label')]) + } else { + if (!missing(cpm)) { + d.slabbed <- as.data.frame(d.long[, as.data.frame(t(unclass(FUN(value, cpm = cpm, ...)))), + by = c('variable', g, 'seg.label')]) + } else { + d.slabbed <- as.data.frame(d.long[, as.data.frame(t(FUN(value, ...))), + by = c('variable', g, 'seg.label')]) + } + } + + # calculate contributing fraction for each variable*groups*slablabel + d.slabbed$contributing_fraction <- d.long[, sum(!is.na(.SD[["value"]])) / .N, + by = c('variable', g, 'seg.label')]$V1 + + # ensure that the names returned from slab.fun are legal + names(d.slabbed) <- make.names(names(d.slabbed)) + # rename default data.table value column names unn <- grepl("^V\\d+$", colnames(d.slabbed)) if (any(unn)) { @@ -311,7 +284,7 @@ attr(d.slabbed, 'original.levels') <- original.levels } - .as.data.frame.aqp(d.slabbed, aqp_df_class(object)) + d.slabbed } # setup generic function From df51525410bf5061ab59ce5ab40b1e7ac4ce8798 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 28 Sep 2022 10:01:48 -0700 Subject: [PATCH 16/23] Fix cpm arg in donttest perturb() example --- R/perturb.R | 2 +- man/perturb.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/perturb.R b/R/perturb.R index bbcd8b612..6a5755924 100644 --- a/R/perturb.R +++ b/R/perturb.R @@ -77,7 +77,7 @@ #' # aggregate horizonation of simulated data #' # note: set class_prob_mode=2 as profiles were not defined to a constant depth #' sim.2$name <- factor(sim.2$name) -#' a <- slab(sim.2, ~ name, class_prob_mode=2) +#' a <- slab(sim.2, ~ name, cpm=2) #' #' # convert to long format for plotting simplicity #' library(data.table) diff --git a/man/perturb.Rd b/man/perturb.Rd index 42ed23a42..df54d75e2 100644 --- a/man/perturb.Rd +++ b/man/perturb.Rd @@ -92,7 +92,7 @@ mtext( # aggregate horizonation of simulated data # note: set class_prob_mode=2 as profiles were not defined to a constant depth sim.2$name <- factor(sim.2$name) -a <- slab(sim.2, ~ name, class_prob_mode=2) +a <- slab(sim.2, ~ name, cpm=2) # convert to long format for plotting simplicity library(data.table) From 48c7cb60fd0596366c28d3588264de9fe813fa85 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Fri, 30 Sep 2022 11:30:09 -0700 Subject: [PATCH 17/23] Docs, removing old TODO --- R/slab.R | 21 ++++++++------------- man/slab-methods.Rd | 9 ++++----- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/R/slab.R b/R/slab.R index 58e2f10e1..fb3173f13 100644 --- a/R/slab.R +++ b/R/slab.R @@ -155,7 +155,7 @@ vars.numeric.test <- is.numeric(data[[vars]]) } - # sanity check: all numeric, or single character/factor + # # sanity check: all numeric, or single character/factor if (any(!vars.numeric.test) & length(vars) > 1) { stop('mixed variable types and multiple categorical variables are not currently supported in the same call to slab', call. = FALSE) } @@ -186,11 +186,6 @@ .factorFlag <- FALSE } - ## Note: use of factor labels could be slowing things down... - ## Note: this assumes ordering is correct in source data / segment labels - ## TODO: investigate use of split() to speed things up, no need to keep everything in the safe DF: - ## l <- split(data, data$seg.label, drop=FALSE) - # add segmenting label to data data$seg.label <- .genSlabLabels2(object, data, slab.structure = slab.structure) @@ -345,10 +340,10 @@ setGeneric("slab", function(object, #' @aliases slab slab2 genSlabLabels slab,SoilProfileCollection-method #' @docType methods #' @param object a SoilProfileCollection -#' @param fm A formula: either `groups ~ var1 + var2 + var3' where named +#' @param fm A formula: either `groups ~ var1 + var2 + var3` where named #' variables are aggregated within `groups' OR where named variables are -#' aggregated across the entire collection ` ~ var1 + var2 + var3'. If `groups` -#' is a factor it must not contain NA. +#' aggregated across the entire collection ` ~ var1 + var2 + var3`. If `groups` +#' is a factor it must not contain `NA` #' @param slab.structure A user-defined slab thickness (defined by an integer), #' or user-defined structure (numeric vector). See details below. #' @param strict logical: should horizons be strictly checked for @@ -356,16 +351,16 @@ setGeneric("slab", function(object, #' @param byhz logical: should horizons or whole profiles be removed by logic checks in `strict`? Default `TRUE` removes only offending horizons, `FALSE` removes whole profiles with one or more illogical horizons. #' @param slab.fun Function used to process each 'slab' of data, ideally #' returning a vector with names attribute. Defaults to a wrapper function -#' around \code{stats::quantile}. See details. +#' around `stats::quantile()`. See details. #' @param cpm Strategy for normalizing slice-wise probabilities, dividing by -#' either: number of profiles with data at the current slice (cpm=1), or by the +#' either: number of profiles with data at the current slice (`cpm=1`), or by the #' number of profiles in the collection (cpm=2). Mode 1 values will always sum #' to the contributing fraction, while mode 2 values will always sum to 1. #' @param weights Column name containing site-level weights -#' @param \dots further arguments passed to \code{slab.fun} +#' @param \dots further arguments passed to `slab.fun` #' @return Output is returned in long format, such that slice-wise aggregates #' are returned once for each combination of grouping level (optional), -#' variable described in the \code{fm} argument, and depth-wise 'slab'. +#' variable described in the `fm` argument, and depth-wise 'slab'. #' #' Aggregation of numeric variables, using the default slab function: #' \describe{ \item{variable}{The names of variables included in the call to diff --git a/man/slab-methods.Rd b/man/slab-methods.Rd index 586be840a..fb884e2dd 100644 --- a/man/slab-methods.Rd +++ b/man/slab-methods.Rd @@ -29,9 +29,8 @@ slab_function( \arguments{ \item{object}{a SoilProfileCollection} -\item{fm}{A formula: either \verb{groups ~ var1 + var2 + var3' where named variables are aggregated within }groups' OR where named variables are -aggregated across the entire collection \verb{~ var1 + var2 + var3'. If}groups` -is a factor it must not contain NA.} +\item{fm}{A formula: either \code{groups ~ var1 + var2 + var3} where named +variables are aggregated within \verb{groups' OR where named variables are aggregated across the entire collection } ~ var1 + var2 + var3\verb{. If }groups\verb{is a factor it must not contain}NA`} \item{slab.structure}{A user-defined slab thickness (defined by an integer), or user-defined structure (numeric vector). See details below.} @@ -43,10 +42,10 @@ self-consistency?} \item{slab.fun}{Function used to process each 'slab' of data, ideally returning a vector with names attribute. Defaults to a wrapper function -around \code{stats::quantile}. See details.} +around \code{stats::quantile()}. See details.} \item{cpm}{Strategy for normalizing slice-wise probabilities, dividing by -either: number of profiles with data at the current slice (cpm=1), or by the +either: number of profiles with data at the current slice (\code{cpm=1}), or by the number of profiles in the collection (cpm=2). Mode 1 values will always sum to the contributing fraction, while mode 2 values will always sum to 1.} From b3880a3cafe7c848acc8629c95e078e5c8a56b09 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Fri, 7 Oct 2022 12:54:46 -0700 Subject: [PATCH 18/23] Fix for dice() related to SPCs with horizon designation name set --- R/dice.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/dice.R b/R/dice.R index d6ed1358b..784810f48 100644 --- a/R/dice.R +++ b/R/dice.R @@ -69,7 +69,7 @@ setGeneric("dice", function(x, if (length(fm) > 2) { stop("please provide a valid formula", call. = FALSE) } - ; + # LHS ~ RHS # LHS: "", single integer, vector of integers slices # z-index @@ -182,6 +182,11 @@ setGeneric("dice", function(x, tops <- unlist(tops$V1) bottoms <- tops + 1 + # use internal hzID if hzidname has been set + if (is.null(h.sub[[hzidn]])) { + hzidn <- "hzID" + } + # expand slice IDs (horizon IDs); used to join with horizons sliceIDs <- rep( h.sub[[hzidn]], From 26c06100b2fed8747360162e50a13338c855c03b Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Fri, 7 Oct 2022 15:46:07 -0700 Subject: [PATCH 19/23] Update slab-factor-eval.R --- misc/slab/slab-factor-eval.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/misc/slab/slab-factor-eval.R b/misc/slab/slab-factor-eval.R index 220755682..2931f404d 100644 --- a/misc/slab/slab-factor-eval.R +++ b/misc/slab/slab-factor-eval.R @@ -4,7 +4,7 @@ library(aqp) ## pre-cached/subset data # all VALID -x <- readRDS('clarksville-pedons-final.rds') +x <- readRDS('misc/slab/clarksville-pedons-final.rds') # keep track of generalized horizon names for later hz.names <- levels(x$genhz) @@ -20,7 +20,6 @@ s$genhz <- factor(s$genhz, levels = hz.names) # compute slice-wise probability: slice-wise P always sum to 1 a <- slab(x, ~ genhz, cpm = 1) - # saveRDS(a, file = 'slab-factor-1x.rds') From 1efc5478d4c2fb2e2d4cc8e7ecffa73eb816c94e Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Fri, 7 Oct 2022 15:50:17 -0700 Subject: [PATCH 20/23] Add slab-factor-2x.rds --- misc/slab/slab-factor-2x.rds | Bin 0 -> 1606 bytes misc/slab/slab-factor-eval.R | 11 +++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 misc/slab/slab-factor-2x.rds diff --git a/misc/slab/slab-factor-2x.rds b/misc/slab/slab-factor-2x.rds new file mode 100644 index 0000000000000000000000000000000000000000..4e50da9a1a2a462678ae3006ac5032d6f340f85d GIT binary patch literal 1606 zcmV-M2D$kkiwFP!000002JM__coRh!fI|-`6y*?T1rZTEP^dxyQM{-^1x1uYi;9?* zwnWkfN&qdWs1;EW6$=O+fKsY}D54@NO0ir;umvoFAU!CDKad|jkN>Fern3QNoo6>q zQ{ZvGr*Ga(x|8f~vh&T%W|Gvz;c$dIBElLt!XsI9M2;9fpj#gvyM#N@_##{d4Qy;| z&L<+me2#E5#tq%6lT+OjS!#a9qmk*UnPao1RU=c}&WZAx4bz+{UQZTFr{H3!y5pq7 z7q)HNwr$(CZKEB5=F2~e#&ub_Y>mN3TO3H7(7w=b-|du^2Jap@IJH-9so#aA!&vHV za6wH0ztnT8icc`O zNUX10bzIHb5h|}2uVdA#MY%vNw^uH3hwwbJ!wx$x{0Jyt1{;R{o2uU^*XOFdAmCQupPp3yFq2u z{v8y@Q@5(!uup8iZq@NF72B&@exGy~i{ozQE#i4~TWw#cK2L#`548hr2kBOwXI33- zdnrWYK7KE1RB&=yhbV&==X8upXr5#0CHd=|ZKuvN_0sI}#8s`P8k{%67p7j8uytf~ z$1#DrzUpT?Oru`&r5%BV<@(5Kf6!|EsH(5!e5A@-a(l&UzEib#td^I@`&Ic@Wx0H? zmd6{;m8~AXP+5LI<NqN73fmll!Zxeyz%KKUp>2AY0YXwpu>amd6FmtlA&5aRilv-5#^;D-rd->|ME9qMU2$ zomDxm6G@4t-X`*a?4spbtT6TV-PxYO$Jd+MYQ7BCTb(mv`SV+~f3X z+F^*G$H(M6AMAKwZO0+&%k?!LYxO#}>_Gc#R{Qt0?RN*-?>D=iQZ}uFHUew3=s)T% zpOq53x%(`GD>nB0r)#Gnrml?lg~1gfADX(2F9Djm(w8>-_e`1yy{lJ`c(#C+pv~xR1+AeC#6T=u3T>esTn6poa_9h8Ku5R| zIzeZ+3a*B0;9BSc*TMDB6}mwj+yFO1cjy7}&=Yz=Z@3Biz|C+A^o3jDHn?3o-QZ5> z2MKT&+ztI<03^aaFc1d8y>K54hWlX%JOB^EP#6ZoVFWw`4?_}+gh${}cnn6t<1iYY z04F3v3TW5(Qy~q~VJx`dNf-xi$bd}nz<8Jd+PqqA#;q47!DN^MPr=iW4O3wnJOj_d zb1)sAhZo>Q$bpw&2D}V2;T3olUV~ZiI?RSQ;7xc7=D^#K3v*!}%!hYi0ch7W-i7zz zefR(t!D3hfAHqkl6!Kshn4y)k{SOZ_eSMW8gg>PUTd<);f z_ply*fFEH46u?IK2?}8oY=$kc6}CYUY=@s=2keAjpcqPE7wm>T@GF$UUic06!G8E1 z4!}Y90}eqMltTqn!ePhhbJIRA?eo$;FVCOzvP%A$gG^^es{S(v(c;0V$<8d7GufRg z?l*P1-ErfyJZUaB>()Hhpr5|bUo3leI>CJK_ zPx87l(?_ReIa9nYPp1A1Ue9>_M;DSkUau!Z+VI9%o+)wq#%r}k*mQ2HtE=19tQc19 zIQzF7nv8LJopHQOMym8w%{*DIbXTU+9e38xLo^I + render_diff() From 72b6182543e888f67f34d144d1254375198f3817 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Fri, 7 Oct 2022 18:21:00 -0700 Subject: [PATCH 21/23] `dice()`: fillHzGaps recalculates hzID and sets it as `hzidname()` - need to recalculate local var to avoid errors --- R/dice.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/R/dice.R b/R/dice.R index 726c519de..7ec186614 100644 --- a/R/dice.R +++ b/R/dice.R @@ -97,6 +97,8 @@ setGeneric("dice", function(x, # must fill from min(z) --- [gaps] --- max(z) + 1 x <- fillHzGaps(x, flag = TRUE, to_top = min(z), to_bottom = max(z) + 1) + # NB: fillHzGaps will recalculate/reset hzID, so update the local var! + hzidn <- "hzID" } # check for '.' --> all variables, minus ID/depths @@ -125,7 +127,6 @@ setGeneric("dice", function(x, z <- NULL } - # time to work with horizons h <- horizons(x) @@ -172,7 +173,6 @@ setGeneric("dice", function(x, list(.SD[[htb[1]]][i] + j - 1) }) ), - # iterate over profiles in the collection (by idname) by = c(idn), @@ -182,11 +182,6 @@ setGeneric("dice", function(x, tops <- unlist(tops$V1) bottoms <- tops + 1 - # use internal hzID if hzidname has been set - if (is.null(h.sub[[hzidn]])) { - hzidn <- "hzID" - } - # expand slice IDs (horizon IDs); used to join with horizons sliceIDs <- rep( h.sub[[hzidn]], From 14b4c9a0e585686f757595b3ef24bb158411c9c6 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Thu, 13 Oct 2022 11:46:29 -0700 Subject: [PATCH 22/23] slab: update cpm=2 comparison --- misc/slab/slab-factor-1x-cpm1.rds | Bin 0 -> 1605 bytes misc/slab/slab-factor-1x-cpm2.rds | Bin 0 -> 1402 bytes misc/slab/slab-factor-eval.R | 9 +++++++++ 3 files changed, 9 insertions(+) create mode 100644 misc/slab/slab-factor-1x-cpm1.rds create mode 100644 misc/slab/slab-factor-1x-cpm2.rds diff --git a/misc/slab/slab-factor-1x-cpm1.rds b/misc/slab/slab-factor-1x-cpm1.rds new file mode 100644 index 0000000000000000000000000000000000000000..9e8c74a7ef75b600a01b8008404e180ecba6e53a GIT binary patch literal 1605 zcmV-L2DYX3RO-~yr@D2MU+E}ikOzP zMA8OI04=Dfh^UB)1q2U3DOEreQ4tlTSgs;i4vQd256a;W><^#Ef7Ex=*#NW7vzvw# zc--&ln|G7$B)i-0d^3}sO{%YHT5T;ls+LwehD9xA*wB8Rd-2*OJZXS0!sSrQ!NK8t zB04JIXssTO^|~jzz2jMGeA=rqY3_{CS<#daDl6Zu+p2^&euH~EnME^&oH?-%U^+46DN9wL+-`fsX!qg@`9XCRQYL- z_`Tu1;&WuH`o+b8U$+dpbb+4ea!r=|*>>9la^APf|FVzhyVYY((5CGWmfH;~tM>1p zIG%=8?S_3~`wgp(cahj$!}8zBaG^NvR^BY$*Rb98h3fw))bf#bsO=!bs`JdQ!)-4` zXxu00g>~{OrnIeV@}g<&>LxUvX6ePbYhA4-&$aZDtdhi)%_f_iGb|96UYf9Fc!PGM zLUm2m&rVoIjpj=yLJQmVk=_2F-TF~gU(5MOmAB;firsvtYVX)BFOT=D@~_Hr`EV_d zH=HZmJ${k0{P&dqkHU7}o&FJV{Y=i!={R6;c23aiMShf>C-R|{^F@A?y;0;tD{m5c zFjTAdFONqStL+W_JIv-M=(Y3pSa7y{uItYTgp)5L7|Ql=b;k{BDi5k`w|=xcE^U?X zLd|<}e^u45Rax#QtHv8-tNPh?%SYPsxPX;a`(rhZpmMm|W3_!dMg1>(SGJxg=U942 z`83aoq(n<^75PAR!7@FTTYB5>EZ@N6>nv?IUxw=~uIVlL30k#()pen$2P5nn3@2YY zVT!QF$K*U8?s#Bz$02LVH8mb<_j_*Hq4wA8_V26P?+&%!Z}ojj@szgu2&~zn|7f^m zW=hLVU1pkGx}p0&o!SqwbXm7Rm|QCIp`}{{5}>8a0%@~X(1!!*e}u{9RSkpcTl9Mk zA4v_wS@SVbpG4XfHDkh^ciZ+pgFXFIA{r%LMvzumq8o29NNMa&2-57zPi)!;l2S;SqQg9)l6^IE;iRzy-;W0{V9rZb*eR7!4kH62^cR z(jfzUFc!vvK2J!WLF9)CFcBufQ}8ro!DN^M&%m?r9887h;RSdRrol@v9bSeR@Cv*N zufa@s9cIBB@Fu(kv*B&XhB+`7=D|BKAM`Prci}yFA3lJEum~2zhwu?BfgD&0x$rT3 z0?S}Itbk8pC42^-!z%a!R>PO@6?_eA;2T&A-@wd9VR~f_&Hrn_x3+ zfvr#g+u&!|4m;o%D1@D`3wFaE_!Wv^FZ>4kU_bm02jC$50f(R%N}v?V;IMY;+;q-M z=e%^z%k$^F)X&MPc_n}8t+lv9OIK)taO~Kz*!V2zsZ)b-&a|6JzR+x2e5RT z|Gp02%Eh8zf3|%{)a!=B9WR$1?)Xx;*QK51>(u1i9AvoC-NtPW8i*I`PIP5@T*+Ry zc;3+E^~R6Q^rd>dtXuV3tv<#=U$Ia-!QbW7Vprn>lDa1Ny9J+gOYnC$R`UDC8uwL5 z_WAw3bn&ctiZ8>T=}Dg8_hh7vOwDwq_&vT1X~XMf`X_A!0+^)9BN7G&-=Kns)MELc~AuJVpQj D4+J|^ literal 0 HcmV?d00001 diff --git a/misc/slab/slab-factor-1x-cpm2.rds b/misc/slab/slab-factor-1x-cpm2.rds new file mode 100644 index 0000000000000000000000000000000000000000..2fbc0f231638c1a7f6009e98d9e24e226d202877 GIT binary patch literal 1402 zcmV-=1%>(_iwFP!000002JPHwcoRh&$MNkcP!vHC6zhoxDpgvFC|<1sUU(I>c-6F| zC6Xp>2(6+h-gx0vMDf0;2Ofxu7b;pst7s8y1;HLrQC}!8d`j9ru8JL$)QX& zdXwp1ca_^aovFRIyEd)N?VCDNnl-J&>zXdlnO^ED3Hk#}ZN*+_KYfc4YC3i5)TvXa z&d4;Zdii(Wth9dyHQ45#_0xteTDT-4H}_vZzB02eoT0zf10r(u6 zDk7J6E)dS84MpY}L}aa$^W&3-Zo28Fn{K;%3)M@fGb`)cDpA+0Y*l1?O=y8|ZWeXP zE`OifHfGLWLnHE!P}waAr)v34Vm+Gi2qyJB<@E})wsAN^zXspY+GO!y4r}8$JtK%19IKBtN(KTWJpv zKHjgYzp9j%k2U4v4Ld8HK7a8_`Sq0FN3o{U{za_)hMtz)JuZ}><3&4r{~3p->t!69 zvftR<_hF*ypemj2ADteTwySs1);)Q=+9cN7Y0Bed)$s;7RpV@@%f~zA;{tZ6dOUW= z5mXuL`q-_X{P{+->GboE?vC#heLSG2UgZv z{kojdj@Rt^HFo_QyNq_+Z})kMs!ltd&uPm2F}u$Zx_kVhyZysxGuGF^E_d|!H&Nqf zbzQd3A~TzL_KJBrN(xe8o_CgxJ5Scg#2jdVNioAuxi}B!V*)O~g}4Y8;}R5LA|~NdT!unS z#^tyISK=yMjUrqF7m87WDR83{Wta*NuEjKXQ4Sycs6Zvm^MuSZh=Q1bDpcb-T#uQ! z0XJe6Zog0<01x6JJd8(BgL!xq^YIuS#{xWo zC-D@X#xr;p&*6EzfEV!+7NQm};}yJ$*RTk$;|;utx3Cy*V+r2DyLb;v@jgDlhxiB| zV;Mfdr}zxZ@j1Rg9lpd@Sb?wc4eIeNzQamg+M8`X0UN`)_i}-``iI z>icl5VjN?eng9Jge7kHBS6 zSHR;c_PWL8zAmpft0Lep^>~@L^SY!wYhtXJNXZY5*fyDM-9ce?elRC|AtygL(wZsg zudsesq1Yb``pdz>gNx5$~-=oH>=CfL!?LfNK?0apsAt9X&77o1 Date: Thu, 13 Oct 2022 11:51:51 -0700 Subject: [PATCH 23/23] slab: add cpm=2 comparison --- misc/slab/slab-factor-2x-cpm1.rds | Bin 0 -> 1606 bytes misc/slab/slab-factor-2x-cpm2.rds | Bin 0 -> 1399 bytes misc/slab/slab-factor-eval.R | 25 ++++++++----------------- 3 files changed, 8 insertions(+), 17 deletions(-) create mode 100644 misc/slab/slab-factor-2x-cpm1.rds create mode 100644 misc/slab/slab-factor-2x-cpm2.rds diff --git a/misc/slab/slab-factor-2x-cpm1.rds b/misc/slab/slab-factor-2x-cpm1.rds new file mode 100644 index 0000000000000000000000000000000000000000..4e50da9a1a2a462678ae3006ac5032d6f340f85d GIT binary patch literal 1606 zcmV-M2D$kkiwFP!000002JM__coRh!fI|-`6y*?T1rZTEP^dxyQM{-^1x1uYi;9?* zwnWkfN&qdWs1;EW6$=O+fKsY}D54@NO0ir;umvoFAU!CDKad|jkN>Fern3QNoo6>q zQ{ZvGr*Ga(x|8f~vh&T%W|Gvz;c$dIBElLt!XsI9M2;9fpj#gvyM#N@_##{d4Qy;| z&L<+me2#E5#tq%6lT+OjS!#a9qmk*UnPao1RU=c}&WZAx4bz+{UQZTFr{H3!y5pq7 z7q)HNwr$(CZKEB5=F2~e#&ub_Y>mN3TO3H7(7w=b-|du^2Jap@IJH-9so#aA!&vHV za6wH0ztnT8icc`O zNUX10bzIHb5h|}2uVdA#MY%vNw^uH3hwwbJ!wx$x{0Jyt1{;R{o2uU^*XOFdAmCQupPp3yFq2u z{v8y@Q@5(!uup8iZq@NF72B&@exGy~i{ozQE#i4~TWw#cK2L#`548hr2kBOwXI33- zdnrWYK7KE1RB&=yhbV&==X8upXr5#0CHd=|ZKuvN_0sI}#8s`P8k{%67p7j8uytf~ z$1#DrzUpT?Oru`&r5%BV<@(5Kf6!|EsH(5!e5A@-a(l&UzEib#td^I@`&Ic@Wx0H? zmd6{;m8~AXP+5LI<NqN73fmll!Zxeyz%KKUp>2AY0YXwpu>amd6FmtlA&5aRilv-5#^;D-rd->|ME9qMU2$ zomDxm6G@4t-X`*a?4spbtT6TV-PxYO$Jd+MYQ7BCTb(mv`SV+~f3X z+F^*G$H(M6AMAKwZO0+&%k?!LYxO#}>_Gc#R{Qt0?RN*-?>D=iQZ}uFHUew3=s)T% zpOq53x%(`GD>nB0r)#Gnrml?lg~1gfADX(2F9Djm(w8>-_e`1yy{lJ`c(#C+pv~xR1+AeC#6T=u3T>esTn6poa_9h8Ku5R| zIzeZ+3a*B0;9BSc*TMDB6}mwj+yFO1cjy7}&=Yz=Z@3Biz|C+A^o3jDHn?3o-QZ5> z2MKT&+ztI<03^aaFc1d8y>K54hWlX%JOB^EP#6ZoVFWw`4?_}+gh${}cnn6t<1iYY z04F3v3TW5(Qy~q~VJx`dNf-xi$bd}nz<8Jd+PqqA#;q47!DN^MPr=iW4O3wnJOj_d zb1)sAhZo>Q$bpw&2D}V2;T3olUV~ZiI?RSQ;7xc7=D^#K3v*!}%!hYi0ch7W-i7zz zefR(t!D3hfAHqkl6!Kshn4y)k{SOZ_eSMW8gg>PUTd<);f z_ply*fFEH46u?IK2?}8oY=$kc6}CYUY=@s=2keAjpcqPE7wm>T@GF$UUic06!G8E1 z4!}Y90}eqMltTqn!ePhhbJIRA?eo$;FVCOzvP%A$gG^^es{S(v(c;0V$<8d7GufRg z?l*P1-ErfyJZUaB>()Hhpr5|bUo3leI>CJK_ zPx87l(?_ReIa9nYPp1A1Ue9>_M;DSkUau!Z+VI9%o+)wq#%r}k*mQ2HtE=19tQc19 zIQzF7nv8LJopHQOMym8w%{*DIbXTU+9e38xLo^Ic-6F| zC6Xp>2(6+h-gx0vMDf1B0}n*S3l*)RRkR4Uf?yA*s4tWkzNUURoeeN?p4}uQ6q$LR zPk)CXI{<_Cd9Y28VBYADl< zo@A!iUE}snWoqwjuFa@$`zBA9X3Z$`x~9r=W|q6kg8l$gTd)^8PTy>VnogZMb?Vfq zGcpaUUjAJ$BjcX|jkft`!;~Ql=P!=PO?}smth3ci#nU(czrKoFGwkfisF0?>6H;KAr zm%q<$A2sW*!4dgKsO%PnQ?>j?v7T0L5cOKlHdd07ACWDhzhLEB(NB<5ejZkY7O#%T z1~I>ts^`_NAEC;n;(nawBC%YwDfd@)@l4=$rkifM?fNaMdKqtv7{95;jq?6nRTt!X zqN-1;#s95r7WX5kYFykJ`uD(aw$C#Q-GYde$JtK%19IKBtN(KTW5{U9q< z+nLjhx4)F2<377fCTst!n{7*Kz0@tb=6HYPbUf&E|EStu%k@ZAx8(kc(|V`s?>Jpv zKHjgYzp9j%k2mGx4Ld8HK7WZy`Sq0FNAaf9{zbg~hVGWJ9v4c|@uHo*|4cyB^)i7? z*>8;XeVD8|s7j~%N2kZ7?dn~$bx$6zHjDLkn({bVb-Y1N)i~Sf@`+CQxPV=%9*^B| z1XaenK6dLTf4&iII{iE(*71F^j|X&@{Qr%=b-mOrqQ(6_mg{`H+g-BuQ?X{Uj>kIP zuge+jc+IX~W7og2%V@{_cAuxH>a^4OoTl6#v-=z&*5enk_79`YcwYy*+|lFTWR0KI zbve54$8Hw;Q=K|>>g+D5`aYoQb9_1F?;)yE^|^kV*dNO7 z59S%&`=USg!vGA#{x|@Ga3HdA5Dvy6I24EBa16!~I1)$UXbizII2OlYC~|N-PQWna zA`ionj}bT#BXJT=#weVEQ*oNPx#0{Hpb%%`ER4n&jK$eF2j}8EoR4w102ksST#QRl zgz=bwOK};BF%g&J3S5b+a5YMB4O}Qi879Gva#Ub4Jh&E9;6){T@S_UVFwd(s&$ta@ z8fs9B>u^1$;|AP_8Mp~IV_!vv^2|mSVSccE>1?ur7zQS^Rjc?F^Z}A;g;CuXlMy$k-ScTR22~B9m z&sc-C_yy~*9>1amt!P6#HejQ%b=%b2rQR;}cG-Ql%MSTx4t%akxAijzS>nnbHLifi zRqAz%%e`G*Z%$RfU+(cTZ`XAx1=hr9F_Bgn9JX~b*Sdq^+`?d9_(EP`aJab8zs&Co z20W$Hf*xN*NqN9k7WDXi)-446Ro0I#l=_1~f2FkIJp=yQ9Bbvxy+-Ox{&aSB4(ggQ zwbR)Cw;Os*as^#E+)brhx~V??fTzOab9r;N`+11WC?9G1HV-sC^f(P;%fG6;gOfZ) F0013m*W&;H literal 0 HcmV?d00001 diff --git a/misc/slab/slab-factor-eval.R b/misc/slab/slab-factor-eval.R index 0bf44c849..8135e3868 100644 --- a/misc/slab/slab-factor-eval.R +++ b/misc/slab/slab-factor-eval.R @@ -6,18 +6,6 @@ library(aqp) # all VALID x <- readRDS('misc/slab/clarksville-pedons-final.rds') -# keep track of generalized horizon names for later -hz.names <- levels(x$genhz) - -# slice out color and horizon name into 1cm intervals: no aggregation -max.depth <- 180 -slice.resolution <- 1 -slice.vect <- seq(from = 0, to = max.depth, by = slice.resolution) -s <- slice(x, slice.vect ~ genhz) - -# convert horizon name to factor -s$genhz <- factor(s$genhz, levels = hz.names) - # compute slice-wise probability: slice-wise P always sum to 1 a <- slab(x, ~ genhz, cpm = 1) @@ -26,13 +14,11 @@ a <- slab(x, ~ genhz, cpm = 1) a.1 <- slab(x, ~ genhz, cpm = 1) a.2 <- slab(x, ~ genhz, cpm = 2) + # saveRDS(a.1, file = 'misc/slab/slab-factor-1x-cpm1.rds') # saveRDS(a.2, file = 'misc/slab/slab-factor-1x-cpm2.rds') -saveRDS(a.1, file = 'misc/slab/slab-factor-2x-cpm1.rds') -saveRDS(a.2, file = 'misc/slab/slab-factor-2x-cpm2.rds') - -# saveRDS(a, file = 'misc/slab/slab-factor-1x-cpm2.rds') -# saveRDS(a, file = 'misc/slab/slab-factor-2x.rds') +# saveRDS(a.1, file = 'misc/slab/slab-factor-2x-cpm1.rds') +# saveRDS(a.2, file = 'misc/slab/slab-factor-2x-cpm2.rds') library(daff) @@ -40,3 +26,8 @@ d1 <- readRDS('misc/slab/slab-factor-1x.rds') d2 <- readRDS('misc/slab/slab-factor-2x.rds') diff_data(d1,d2) |> render_diff() + +d1 <- readRDS('misc/slab/slab-factor-1x-cpm2.rds') +d2 <- readRDS('misc/slab/slab-factor-2x-cpm2.rds') +diff_data(d1,d2) |> + render_diff()