From 529aa5c647966a52a7ce32f5e03e55f24ca46287 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 29 Jun 2022 09:07:23 -0700 Subject: [PATCH 1/9] Export `get_NASIS_metadata()` / Add `get_NASIS_column_metadata()` --- NAMESPACE | 2 + R/uncode.R | 120 ++++++++++++++++++++++++++------------ man/get_NASIS_metadata.Rd | 39 +++++++++++++ 3 files changed, 124 insertions(+), 37 deletions(-) create mode 100644 man/get_NASIS_metadata.Rd diff --git a/NAMESPACE b/NAMESPACE index 88dc867e..563b9137 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -41,7 +41,9 @@ export(format_SQL_in_statement) export(getHzErrorsNASIS) export(getHzErrorsPedonPC) export(get_EDIT_ecoclass_by_geoUnit) +export(get_NASIS_column_metadata) export(get_NASIS_fkey_by_name) +export(get_NASIS_metadata) export(get_NASIS_pkey_by_name) export(get_NASIS_pkeyref_by_name) export(get_NASIS_table_key_by_name) diff --git a/R/uncode.R b/R/uncode.R index 656eb0f1..636ce8a8 100644 --- a/R/uncode.R +++ b/R/uncode.R @@ -77,20 +77,7 @@ uncode <- function(df, NASISDomainsAsFactor(stringsAsFactors) } - # load current metadata table - if (local_NASIS_defined(dsn = dsn)) { - - # cache NASIS metadata in soilDB.env within an R session - if (!exists("NASIS.metadata", envir = soilDB.env)) { - metadata <- .get_NASIS_metadata(dsn = dsn) - assign('NASIS.metadata', value = metadata, envir = soilDB.env) - } else { - metadata <- get("NASIS.metadata", envir = soilDB.env) - } - - } else { - load(system.file("data/metadata.rda", package = "soilDB")[1]) - } + metadata <- get_NASIS_metadata(dsn = dsn) # unique set of possible columns that will need replacement metadata_col <- names(metadata)[grep("ColumnPhysicalName", names(metadata), ignore.case = TRUE)] @@ -140,29 +127,6 @@ uncode <- function(df, return(df) } -.get_NASIS_metadata <- function(dsn = NULL) { - - q <- "SELECT mdd.DomainID, DomainName, DomainRanked, DisplayLabel, - ChoiceSequence, ChoiceValue, ChoiceName, ChoiceLabel, ChoiceObsolete, - ColumnPhysicalName, ColumnLogicalName - FROM MetadataDomainDetail mdd - INNER JOIN MetadataDomainMaster mdm ON mdm.DomainID = mdd.DomainID - INNER JOIN (SELECT MIN(DomainID) DomainID, MIN(ColumnPhysicalName) ColumnPhysicalName, MIN(ColumnLogicalName) ColumnLogicalName - FROM MetadataTableColumn GROUP BY DomainID, ColumnPhysicalName) mtc ON mtc.DomainID = mdd.DomainID - ORDER BY mdd.DomainID, ColumnPhysicalName, ChoiceValue;" - - channel <- dbConnectNASIS(dsn) - - if (inherits(channel, 'try-error')) - return(data.frame()) - - # exec query - d <- dbQueryNASIS(channel, q) - - # done - return(d) -} - # convenient, inverted version of uncode() #' @export #' @rdname uncode @@ -195,3 +159,85 @@ NASISDomainsAsFactor <- function(x = NULL) { invisible(getOption("soilDB.NASIS.DomainsAsFactor", default = FALSE)) } +#' Get NASIS Metadata (Domain, Column and Choice Lists) +#' +#' Retrieve a table containing domain and column names with choice list labels/names/sequences/values from the NASIS 7 metadata tables. +#' +#' These data are derived from the MetadataDomainDetail, MetadataDomainMaster, and MetadataTableColumn tables and help with mapping between values stored in the NASIS database and human-readable values. The human-readable values align with the values returned in public facing interfaces such as SSURGO via Soil Data Access and NASIS Web Reports. The data in these tables can also be used to create _ordered_ factors where options for levels of a particular data element follow a logical `ChoiceSequence`. +#' +#' @param dsn Optional: path to local SQLite database containing NASIS table structure; default: `NULL` +#' +#' @details If a local NASIS instance is set up, and this is the first time `get_NASIS_metadata()` has been called, the metadata will be obtained from the NASIS local database. Subsequent runs in the same session will use a copy of the data object `NASIS.metadata` cached in `soilDB.env`. +#' +#' For users without a local NASIS instance, a cached copy of the NASIS metadata are used `(data/metadata.rda)`. +#' +#' See `?soilDB::metadata` for additional details. +#' +#' @return a `data.frame` containing DomainID, DomainName, DomainRanked, DisplayLabel, ChoiceSequence, ChoiceValue, ChoiceName, ChoiceLabel, ChoiceObsolete, ColumnPhysicalName, ColumnLogicalName +#' @export +#' +#' @examples +#' get_NASIS_metadata() +get_NASIS_metadata <- function(dsn = NULL) { + + .doQuery <- function(dsn){ + q <- "SELECT mdd.DomainID, DomainName, DomainRanked, DisplayLabel, + ChoiceSequence, ChoiceValue, ChoiceName, ChoiceLabel, ChoiceObsolete, + ColumnPhysicalName, ColumnLogicalName + FROM MetadataDomainDetail mdd + INNER JOIN MetadataDomainMaster mdm ON mdm.DomainID = mdd.DomainID + INNER JOIN (SELECT MIN(DomainID) DomainID, MIN(ColumnPhysicalName) ColumnPhysicalName, MIN(ColumnLogicalName) ColumnLogicalName + FROM MetadataTableColumn GROUP BY DomainID, ColumnPhysicalName) mtc ON mtc.DomainID = mdd.DomainID + ORDER BY mdd.DomainID, ColumnPhysicalName, ChoiceValue;" + + channel <- dbConnectNASIS(dsn) + + if (inherits(channel, 'try-error')) + return(data.frame()) + + # exec query + dbQueryNASIS(channel, q) + } + + # load current metadata table + if (local_NASIS_defined(dsn = dsn)) { + + # cache NASIS metadata in soilDB.env within an R session + if (!exists("NASIS.metadata", envir = soilDB.env)) { + metadata <- .doQuery(dsn = dsn) + assign('NASIS.metadata', value = metadata, envir = soilDB.env) + } else { + metadata <- get("NASIS.metadata", envir = soilDB.env) + } + + } else { + load(system.file("data/metadata.rda", package = "soilDB")[1]) + } + +} + +#' Get NASIS metadata entries for specific domains or choices +#' +#' @param x character vector to match in NASIS metadata +#' @param what Column to match `x` against. Default "ColumnPhysicalName"; alternate options include `"DomainID"`, `"DomainName"`, `"DomainRanked"`, `"DisplayLabel"`, `"ChoiceSequence"`, `"ChoiceValue"`, `"ChoiceName"`, `"ChoiceLabel"`, `"ChoiceObsolete"`, `"ChoiceDescription"`, `"ColumnLogicalName"` +#' @return a `data.frame` containing selected NASIS metadata sorted first on `DomainID` and then on `ChoiceSequence` +#' @export +#' @rdname get_NASIS_metadata +#' @examples +#' get_NASIS_column_metadata("texcl") +get_NASIS_column_metadata <- function(x, + what = "ColumnPhysicalName", + dsn = NULL) { + metadata <- get_NASIS_metadata(dsn = dsn) + mds <- metadata[metadata[[what]] %in% x, ] + mds <- mds[order(mds$DomainID, mds$ChoiceSequence), ] + mds +} + +#' @keywords internal +#' @noRd +.get_NASIS_metadata <- function(dsn = NULL) { + # for backward compatibility or anyone who is using the .get method in the wild + .Deprecated("get_NASIS_metadata") + get_NASIS_metadata(dsn) +} diff --git a/man/get_NASIS_metadata.Rd b/man/get_NASIS_metadata.Rd new file mode 100644 index 00000000..f92f9146 --- /dev/null +++ b/man/get_NASIS_metadata.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/uncode.R +\name{get_NASIS_metadata} +\alias{get_NASIS_metadata} +\alias{get_NASIS_column_metadata} +\title{Get NASIS Metadata (Domain, Column and Choice Lists)} +\usage{ +get_NASIS_metadata(dsn = NULL) + +get_NASIS_column_metadata(x, what = "ColumnPhysicalName", dsn = NULL) +} +\arguments{ +\item{dsn}{Optional: path to local SQLite database containing NASIS table structure; default: \code{NULL}} + +\item{x}{character vector to match in NASIS metadata} + +\item{what}{Column to match \code{x} against. Default "ColumnPhysicalName"; alternate options include \code{"DomainID"}, \code{"DomainName"}, \code{"DomainRanked"}, \code{"DisplayLabel"}, \code{"ChoiceSequence"}, \code{"ChoiceValue"}, \code{"ChoiceName"}, \code{"ChoiceLabel"}, \code{"ChoiceObsolete"}, \code{"ChoiceDescription"}, \code{"ColumnLogicalName"}} +} +\value{ +a \code{data.frame} containing DomainID, DomainName, DomainRanked, DisplayLabel, ChoiceSequence, ChoiceValue, ChoiceName, ChoiceLabel, ChoiceObsolete, ColumnPhysicalName, ColumnLogicalName + +a \code{data.frame} containing selected NASIS metadata sorted first on \code{DomainID} and then on \code{ChoiceSequence} +} +\description{ +Retrieve a table containing domain and column names with choice list labels/names/sequences/values from the NASIS 7 metadata tables. +} +\details{ +These data are derived from the MetadataDomainDetail, MetadataDomainMaster, and MetadataTableColumn tables and help with mapping between values stored in the NASIS database and human-readable values. The human-readable values align with the values returned in public facing interfaces such as SSURGO via Soil Data Access and NASIS Web Reports. The data in these tables can also be used to create \emph{ordered} factors where options for levels of a particular data element follow a logical \code{ChoiceSequence}. + +If a local NASIS instance is set up, and this is the first time \code{get_NASIS_metadata()} has been called, the metadata will be obtained from the NASIS local database. Subsequent runs in the same session will use a copy of the data object \code{NASIS.metadata} cached in \code{soilDB.env}. + +For users without a local NASIS instance, a cached copy of the NASIS metadata are used \code{(data/metadata.rda)}. + +See \code{?soilDB::metadata} for additional details. +} +\examples{ +get_NASIS_metadata() +get_NASIS_column_metadata("texcl") +} From 732255b74af0f1547cfbb243c3ea7a64252d992a Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 29 Jun 2022 10:54:30 -0700 Subject: [PATCH 2/9] Fix `.cosoilmoist_prep()` RE: #241, #242, #253 --- R/uncode.R | 37 ++++++++++++++++++++++------- R/utils.R | 69 +++++++++++++++++++++++++++--------------------------- 2 files changed, 64 insertions(+), 42 deletions(-) diff --git a/R/uncode.R b/R/uncode.R index 636ce8a8..b8901099 100644 --- a/R/uncode.R +++ b/R/uncode.R @@ -98,16 +98,37 @@ uncode <- function(df, if (!invert) { # replace values with ChoiceName, try filling NA with replace based on ChoiceLabel - nc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[name_col]]) - lc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[label_col]]) - nc[is.na(nc)] <- lc[is.na(nc)] - df[[i]] <- nc - } else { + # do not explicitly set `levels` if none of the values in value_col (numeric) are present + if (any(df[[i]] %in% sub[[value_col]])) { + nc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[name_col]]) + lc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[label_col]]) + if (all(is.na(nc))) { + df[[i]] <- lc + } else { + nc[is.na(nc)] <- lc[is.na(nc)] + df[[i]] <- nc + } + df[[i]] <- nc + } else { + nc <- factor(df[[i]], levels = sub[[name_col]], labels = sub[[name_col]]) + lc <- factor(df[[i]], levels = sub[[label_col]], labels = sub[[label_col]]) + if (all(is.na(nc))) { + df[[i]] <- lc + } else { + nc[is.na(nc)] <- lc[is.na(nc)] + df[[i]] <- nc + } + } + } else if (invert) { # replace values with ChoiceName, try filling NA with replace based on ChoiceLabel nc <- factor(df[[i]], levels = sub[[name_col]], labels = sub[[value_col]]) lc <- factor(df[[i]], levels = sub[[label_col]], labels = sub[[value_col]]) - nc[is.na(nc)] <- lc[is.na(nc)] - df[[i]] <- nc + if (all(is.na(nc))) { + df[[i]] <- lc + } else { + nc[is.na(nc)] <- lc[is.na(nc)] + df[[i]] <- nc + } } } @@ -219,7 +240,7 @@ get_NASIS_metadata <- function(dsn = NULL) { #' Get NASIS metadata entries for specific domains or choices #' #' @param x character vector to match in NASIS metadata -#' @param what Column to match `x` against. Default "ColumnPhysicalName"; alternate options include `"DomainID"`, `"DomainName"`, `"DomainRanked"`, `"DisplayLabel"`, `"ChoiceSequence"`, `"ChoiceValue"`, `"ChoiceName"`, `"ChoiceLabel"`, `"ChoiceObsolete"`, `"ChoiceDescription"`, `"ColumnLogicalName"` +#' @param what Column to match `x` against. Default `"ColumnPhysicalName"`; alternate options include `"DomainID"`, `"DomainName"`, `"DomainRanked"`, `"DisplayLabel"`, `"ChoiceSequence"`, `"ChoiceValue"`, `"ChoiceName"`, `"ChoiceLabel"`, `"ChoiceObsolete"`, `"ChoiceDescription"`, `"ColumnLogicalName"` #' @return a `data.frame` containing selected NASIS metadata sorted first on `DomainID` and then on `ChoiceSequence` #' @export #' @rdname get_NASIS_metadata diff --git a/R/utils.R b/R/utils.R index 982821d1..fd3a7631 100644 --- a/R/utils.R +++ b/R/utils.R @@ -734,47 +734,48 @@ orig_names <- names(df) # relabel names - names(df) <- gsub("^soimoist", "", names(df)) - old_names <- "stat" - new_names <- "status" - names(df)[names(df) %in% old_names] <- new_names - + # names(df) <- gsub("^soimoist", "", names(df)) + # old_names <- "stat" + # new_names <- "status" + # names(df)[names(df) %in% old_names] <- new_names # setting frequency levels and order + + # NOTE: the next block of code require factor levels be set, regardless of package options # NOTE: the SDA domains for flooding and ponding have different levels, and "Common" is obsolete - # TODO: replace with ordering derived from NASIS ChoiceSequence - flod_lev <- factor(df$flodfreqcl, levels = c("None", "Very rare", "Rare", "Occasional", "Common", "Frequent", "Very frequent")) - pond_lev <- factor(df$pondfreqcl, levels = c("None", "Rare", "Occasional", "Common", "Frequent")) + # .:. ordering implied in get_NASIS_column_metadata result from NASIS ChoiceSequence + flod_lev <- factor(df$flodfreqcl, levels = get_NASIS_column_metadata("flodfreqcl")$ChoiceLabel) + pond_lev <- factor(df$pondfreqcl, levels = get_NASIS_column_metadata("pondfreqcl")$ChoiceLabel) + mois_lev <- factor(df$soimoiststat, levels = get_NASIS_column_metadata("soimoiststat")$ChoiceLabel) + # impute NA freqcl values, default = "not populated" - if (impute == TRUE) { + if (impute) { missing <- "Not populated" - lev_flodfreqcl <- c(missing, levels(df$flodfreqcl)) - lev_pondfreqcl <- c(missing, levels(df$pondfreqcl)) - lev_status <- c(missing, levels(df$status)) - - df <- within(df, { - # replace NULL RV depths with 201 cm if pondfreqcl or flodqcl is not NULL - dept_r[is.na(dept_r) & (!is.na(pondfreqcl) | !is.na(flodfreqcl))] = 201 - depb_r[is.na(depb_r) & (!is.na(pondfreqcl) | !is.na(flodfreqcl))] = 201 - - # replace NULL L and H depths with the RV - dept_l = ifelse(is.na(dept_l), dept_r, dept_l) - dept_h = ifelse(is.na(dept_h), dept_r, dept_h) - - depb_l = ifelse(is.na(depb_l), depb_r, depb_l) - depb_h = ifelse(is.na(depb_h), depb_r, depb_h) - - # replace NULL freqcl with "Not_Populated" - status = factor(levels(status)[status], levels = lev_status) - flodfreqcl = factor(levels(flodfreqcl)[flodfreqcl], levels = lev_flodfreqcl) - pondfreqcl = factor(levels(pondfreqcl)[pondfreqcl], levels = lev_flodfreqcl) - - status[is.na(status)] <- missing - flodfreqcl[is.na(flodfreqcl)] <- missing - pondfreqcl[is.na(pondfreqcl)] <- missing - }) + lev_flodfreqcl <- c(missing, levels(flod_lev)) + lev_pondfreqcl <- c(missing, levels(pond_lev)) + lev_status <- c(missing, levels(mois_lev)) + + # replace NULL RV depths with 201 cm if pondfreqcl or flodqcl is not NULL + df$soimoistdept_r[is.na(df$soimoistdept_r) & (!is.na(df$pondfreqcl) | !is.na(df$flodfreqcl))] <- 201 + df$soimoistdepb_r[is.na(df$soimoistdepb_r) & (!is.na(df$pondfreqcl) | !is.na(df$flodfreqcl))] <- 201 + + # replace NULL L and H depths with the RV + df$soimoistdept_l <- ifelse(is.na(df$soimoistdept_l), df$soimoistdept_r, df$soimoistdept_l) + df$soimoistdept_h <- ifelse(is.na(df$soimoistdept_h), df$soimoistdept_r, df$soimoistdept_h) + df$soimoistdepb_l <- ifelse(is.na(df$soimoistdepb_l), df$soimoistdepb_r, df$soimoistdepb_l) + df$soimoistdepb_h <- ifelse(is.na(df$soimoistdepb_h), df$soimoistdepb_r, df$soimoistdepb_h) + + # relevel factors with "Not populated" as first level + df$soimoiststat <- factor(as.character(df$soimoiststat), levels = lev_status) + df$flodfreqcl <- factor(as.character(df$flodfreqcl), levels = lev_flodfreqcl) + df$pondfreqcl <- factor(as.character(df$pondfreqcl), levels = lev_flodfreqcl) + + # replace NULL moist state and frequency class with "Not populated" + df$soimoiststat[is.na(df$soimoiststat)] <- missing + df$flodfreqcl[is.na(df$flodfreqcl)] <- missing + df$pondfreqcl[is.na(df$pondfreqcl)] <- missing } # convert factors to strings From 163c295829fa86a57688441389496301250ae2d0 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 29 Jun 2022 12:44:23 -0700 Subject: [PATCH 3/9] Fix `get_NASIS_metadata()` on devices without local NASIS --- R/uncode.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/uncode.R b/R/uncode.R index b8901099..016155fb 100644 --- a/R/uncode.R +++ b/R/uncode.R @@ -201,6 +201,8 @@ NASISDomainsAsFactor <- function(x = NULL) { #' get_NASIS_metadata() get_NASIS_metadata <- function(dsn = NULL) { + metadata <- NULL + .doQuery <- function(dsn){ q <- "SELECT mdd.DomainID, DomainName, DomainRanked, DisplayLabel, ChoiceSequence, ChoiceValue, ChoiceName, ChoiceLabel, ChoiceObsolete, @@ -231,10 +233,13 @@ get_NASIS_metadata <- function(dsn = NULL) { metadata <- get("NASIS.metadata", envir = soilDB.env) } - } else { + } + + if (is.null(metadata) || (is.data.frame(metadata) && nrow(metadata) == 0)) { load(system.file("data/metadata.rda", package = "soilDB")[1]) } - + + metadata } #' Get NASIS metadata entries for specific domains or choices From e75834250ed9d9a756fb87c8a5eb258829675250 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 29 Jun 2022 13:37:35 -0700 Subject: [PATCH 4/9] uncode/code: use `type.convert()` not `as.character()`for converting from factor - uncode: Use `NASISDomainsAsFactor()` to check package option --- R/uncode.R | 13 +++++++------ man/get_NASIS_metadata.Rd | 2 +- man/uncode.Rd | 2 +- tests/testthat/test-uncode.R | 25 +++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 tests/testthat/test-uncode.R diff --git a/R/uncode.R b/R/uncode.R index 016155fb..09c4c9b5 100644 --- a/R/uncode.R +++ b/R/uncode.R @@ -138,11 +138,12 @@ uncode <- function(df, df <- droplevels(df, except = idx) } - # convert factors to strings, check soilDB option first - if ((length(stringsAsFactors) > 0 && !stringsAsFactors) || - !getOption("soilDB.NASIS.DomainsAsFactor", default = FALSE)) { + # convert factors to strings/numeric, check soilDB option first + if (invert || (length(stringsAsFactors) > 0 && !stringsAsFactors) || !NASISDomainsAsFactor()) { idx <- unlist(lapply(df, is.factor)) - df[idx] <- lapply(df[idx], as.character) + df[idx] <- lapply(df[idx], function(x) { + type.convert(x, as.is = TRUE) + }) } return(df) @@ -152,11 +153,11 @@ uncode <- function(df, #' @export #' @rdname uncode code <- function(df, - db = "NASIS", + db = NULL, droplevels = FALSE, stringsAsFactors = NULL, dsn = NULL) { - res <- uncode(df, invert = TRUE, db = db, droplevels = droplevels, stringsAsFactors = stringsAsFactors, dsn = dsn) + res <- uncode(df, invert = TRUE, droplevels = droplevels, stringsAsFactors = stringsAsFactors, dsn = dsn) return(res) } diff --git a/man/get_NASIS_metadata.Rd b/man/get_NASIS_metadata.Rd index f92f9146..7ed0b9c9 100644 --- a/man/get_NASIS_metadata.Rd +++ b/man/get_NASIS_metadata.Rd @@ -14,7 +14,7 @@ get_NASIS_column_metadata(x, what = "ColumnPhysicalName", dsn = NULL) \item{x}{character vector to match in NASIS metadata} -\item{what}{Column to match \code{x} against. Default "ColumnPhysicalName"; alternate options include \code{"DomainID"}, \code{"DomainName"}, \code{"DomainRanked"}, \code{"DisplayLabel"}, \code{"ChoiceSequence"}, \code{"ChoiceValue"}, \code{"ChoiceName"}, \code{"ChoiceLabel"}, \code{"ChoiceObsolete"}, \code{"ChoiceDescription"}, \code{"ColumnLogicalName"}} +\item{what}{Column to match \code{x} against. Default \code{"ColumnPhysicalName"}; alternate options include \code{"DomainID"}, \code{"DomainName"}, \code{"DomainRanked"}, \code{"DisplayLabel"}, \code{"ChoiceSequence"}, \code{"ChoiceValue"}, \code{"ChoiceName"}, \code{"ChoiceLabel"}, \code{"ChoiceObsolete"}, \code{"ChoiceDescription"}, \code{"ColumnLogicalName"}} } \value{ a \code{data.frame} containing DomainID, DomainName, DomainRanked, DisplayLabel, ChoiceSequence, ChoiceValue, ChoiceName, ChoiceLabel, ChoiceObsolete, ColumnPhysicalName, ColumnLogicalName diff --git a/man/uncode.Rd b/man/uncode.Rd index 164c79e9..09cb50c9 100644 --- a/man/uncode.Rd +++ b/man/uncode.Rd @@ -14,7 +14,7 @@ uncode( dsn = NULL ) -code(df, db = "NASIS", droplevels = FALSE, stringsAsFactors = NULL, dsn = NULL) +code(df, db = NULL, droplevels = FALSE, stringsAsFactors = NULL, dsn = NULL) } \arguments{ \item{df}{data.frame} diff --git a/tests/testthat/test-uncode.R b/tests/testthat/test-uncode.R new file mode 100644 index 00000000..bf2c5d85 --- /dev/null +++ b/tests/testthat/test-uncode.R @@ -0,0 +1,25 @@ +test_that("uncode() works", { + x <- data.frame(texcl = 1:10) + expect_equal(uncode(x)$texcl, c("cos", "s", "fs", "vfs", "lcos", "ls", "lfs", "lvfs", "cosl", "sl")) +}) + +test_that("uncode() works w/ NASISDomainsAsFactor(TRUE)", { + NASISDomainsAsFactor(TRUE) + x <- data.frame(texcl = 1:10) + expect_equal(uncode(x)$texcl, structure(1:10, .Label = c("cos", "s", "fs", "vfs", "lcos", "ls", + "lfs", "lvfs", "cosl", "sl", "fsl", "vfsl", "l", "sil", "si", + "scl", "cl", "sicl", "sc", "sic", "c"), class = "factor")) + NASISDomainsAsFactor(FALSE) +}) + +test_that("code() works", { + x <- data.frame(texcl = c("cos", "s", "fs", "vfs", "lcos", "ls", "lfs", "lvfs", "cosl", "sl")) + expect_equal(code(x)$texcl, 1:10) +}) + +test_that("code() works w/ NASISDomainsAsFactor(TRUE)", { + NASISDomainsAsFactor(TRUE) + x <- data.frame(texcl = c("cos", "s", "fs", "vfs", "lcos", "ls", "lfs", "lvfs", "cosl", "sl")) + expect_equal(code(x)$texcl, 1:10) + NASISDomainsAsFactor(FALSE) +}) From 2db5475ef9435ba79766c3021fd8380fc7ca40aa Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 29 Jun 2022 17:55:38 -0700 Subject: [PATCH 5/9] Add `NASISChoiceList()` --- NAMESPACE | 1 + R/uncode.R | 76 ++++++++++++++++++++++++++++++++++++ man/NASISChoiceList.Rd | 60 ++++++++++++++++++++++++++++ tests/testthat/test-uncode.R | 20 ++++++++++ 4 files changed, 157 insertions(+) create mode 100644 man/NASISChoiceList.Rd diff --git a/NAMESPACE b/NAMESPACE index 563b9137..2d00b433 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(ISSR800.wcs) export(KSSL_VG_model) export(NASIS) +export(NASISChoiceList) export(NASISDomainsAsFactor) export(OSDquery) export(ROSETTA) diff --git a/R/uncode.R b/R/uncode.R index 09c4c9b5..c04be2dd 100644 --- a/R/uncode.R +++ b/R/uncode.R @@ -261,6 +261,82 @@ get_NASIS_column_metadata <- function(x, mds } +#' Work with NASIS Choice Lists +#' +#' Create (ordered) factors and interchange between choice names, values and labels for lists of input vectors. +#' +#' @param x A named list of vectors to use as input for NASIS Choice List lookup +#' @param colnames vector of values of the column specified by `what`. E.g. `colnames="texcl"` for `what="ColumnPhysicalName"`. Default: `names(x)` (if x is named) +#' @param what passed to `get_NASIS_column_metadata()`; Column to match `x` against. Default `"ColumnPhysicalName"`; alternate options include `"DomainID"`, `"DomainName"`, `"DomainRanked"`, `"DisplayLabel"`, `"ChoiceSequence"`, `"ChoiceValue"`, `"ChoiceName"`, `"ChoiceLabel"`, `"ChoiceObsolete"`, `"ChoiceDescription"`, `"ColumnLogicalName"` +#' @param choice one of: `"ChoiceName"`, `"ChoiceValue"`, or `"ChoiceLabel"` +#' @param obsolete Include "obsolete" choices? Default: `FALSE` +#' @param factor Convert result to factor? Default: `TRUE` +#' @param droplevels Drop unused factor levels? Default: `TRUE` (used only when `factor=TRUE`) +#' @param ordered Should the result be an ordered factor? Default: `TRUE` (use _only_ if `DomainRanked` is true for all choices) +#' @param simplify Should list result with length 1 be reduced to a single vector? Default: `TRUE` +#' @param dsn Optional: path to local SQLite database containing NASIS table structure; default: NULL +#' @return A list of "choices" based on the input `x` that have been converted to a consistent target set of levels (specified by `choice`) via NASIS 7 metadata. +#' +#' When `factor=TRUE` the result is a factor, possibly ordered when `ordered=TRUE` and the target domain is a "ranked" domain (i.e. `ChoiceSequence` has logical meaning). +#' +#' When `factor=FALSE` the result is a character or numeric vector. Numeric vectors are always returned when `choice` is `"ChoiceValue"`. +#' +#' @export +#' +#' @examples +#' +#' NASISChoiceList(1:3, "texcl") +#' +#' NASISChoiceList(1:3, "pondfreqcl") +#' +#' NASISChoiceList("Clay loam", "texcl", choice = "ChoiceValue") +#' +#' NASISChoiceList("Silty clay loam", "texcl", choice = "ChoiceName") +NASISChoiceList <- function(x, + colnames = names(x), + what = "ColumnPhysicalName", + choice = c("ChoiceName", "ChoiceValue", "ChoiceLabel"), + obsolete = FALSE, + factor = TRUE, + droplevels = FALSE, + ordered = TRUE, + simplify = TRUE, + dsn = NULL) { + choice <- match.arg(choice, choices = c("ChoiceName", "ChoiceValue", "ChoiceLabel")) + if (!is.list(x)) { + n <- colnames + x <- list(x) + if (length(n) == length(x)) { + names(x) <- n + } + } + res <- lapply(names(x), function(xx) { + y <- get_NASIS_column_metadata(xx, what = what, dsn = dsn) + if (!obsolete) { + y <- y[y$ChoiceObsolete == 0, ] + } + idx <- na.omit(as.numeric(apply(do.call('cbind', lapply(y[c("ChoiceValue", "ChoiceName", "ChoiceLabel")], function(xxx) match(x[[xx]], xxx))), MARGIN = 1, \(xxx) as.numeric(na.omit(xxx))))) + yy <- y[idx,] + if (choice != "ChoiceValue" && factor) { + f <- factor(yy[[choice]], + levels = y[[choice]], + ordered = ordered && all(yy$DomainRanked)) + if (droplevels) { + return(droplevels(x)) + } else { + return(f) + } + } else { + return(yy[[choice]]) + } + }) + + if (simplify && length(res) == 1) { + return(res[[1]]) + } + res +} + #' @keywords internal #' @noRd .get_NASIS_metadata <- function(dsn = NULL) { diff --git a/man/NASISChoiceList.Rd b/man/NASISChoiceList.Rd new file mode 100644 index 00000000..f37c2574 --- /dev/null +++ b/man/NASISChoiceList.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/uncode.R +\name{NASISChoiceList} +\alias{NASISChoiceList} +\title{Work with NASIS Choice Lists} +\usage{ +NASISChoiceList( + x, + colnames = names(x), + what = "ColumnPhysicalName", + choice = c("ChoiceName", "ChoiceValue", "ChoiceLabel"), + obsolete = FALSE, + factor = TRUE, + droplevels = FALSE, + ordered = TRUE, + simplify = TRUE, + dsn = NULL +) +} +\arguments{ +\item{x}{A named list of vectors to use as input for NASIS Choice List lookup} + +\item{colnames}{vector of values of the column specified by \code{what}. E.g. \code{colnames="texcl"} for \code{what="ColumnPhysicalName"}. Default: \code{names(x)} (if x is named)} + +\item{what}{passed to \code{get_NASIS_column_metadata()}; Column to match \code{x} against. Default \code{"ColumnPhysicalName"}; alternate options include \code{"DomainID"}, \code{"DomainName"}, \code{"DomainRanked"}, \code{"DisplayLabel"}, \code{"ChoiceSequence"}, \code{"ChoiceValue"}, \code{"ChoiceName"}, \code{"ChoiceLabel"}, \code{"ChoiceObsolete"}, \code{"ChoiceDescription"}, \code{"ColumnLogicalName"}} + +\item{choice}{one of: \code{"ChoiceName"}, \code{"ChoiceValue"}, or \code{"ChoiceLabel"}} + +\item{obsolete}{Include "obsolete" choices? Default: \code{FALSE}} + +\item{factor}{Convert result to factor? Default: \code{TRUE}} + +\item{droplevels}{Drop unused factor levels? Default: \code{TRUE} (used only when \code{factor=TRUE})} + +\item{ordered}{Should the result be an ordered factor? Default: \code{TRUE} (use \emph{only} if \code{DomainRanked} is true for all choices)} + +\item{simplify}{Should list result with length 1 be reduced to a single vector? Default: \code{TRUE}} + +\item{dsn}{Optional: path to local SQLite database containing NASIS table structure; default: NULL} +} +\value{ +A list of "choices" based on the input \code{x} that have been converted to a consistent target set of levels (specified by \code{choice}) via NASIS 7 metadata. + +When \code{factor=TRUE} the result is a factor, possibly ordered when \code{ordered=TRUE} and the target domain is a "ranked" domain (i.e. \code{ChoiceSequence} has logical meaning). + +When \code{factor=FALSE} the result is a character or numeric vector. Numeric vectors are always returned when \code{choice} is \code{"ChoiceValue"}. +} +\description{ +Create (ordered) factors and interchange between choice names, values and labels for lists of input vectors. +} +\examples{ + +NASISChoiceList(1:3, "texcl") + +NASISChoiceList(1:3, "pondfreqcl") + +NASISChoiceList("Clay loam", "texcl", choice = "ChoiceValue") + +NASISChoiceList("Silty clay loam", "texcl", choice = "ChoiceName") +} diff --git a/tests/testthat/test-uncode.R b/tests/testthat/test-uncode.R index bf2c5d85..9259120d 100644 --- a/tests/testthat/test-uncode.R +++ b/tests/testthat/test-uncode.R @@ -23,3 +23,23 @@ test_that("code() works w/ NASISDomainsAsFactor(TRUE)", { expect_equal(code(x)$texcl, 1:10) NASISDomainsAsFactor(FALSE) }) + +test_that("NASISChoiceList() works", { + x <- NASISChoiceList(1:3, colnames = "texcl") + expect_equal(x, structure(c(3L, 12L, 5L), .Label = c("c", "cl", "cos", "cosl", + "fs", "fsl", "l", "lcos", "lfs", "ls", "lvfs", "s", "sc", "scl", + "si", "sic", "sicl", "sil", "sl", "vfs", "vfsl"), class = "factor")) + + x <- NASISChoiceList(1:3, colnames = "pondfreqcl", factor = FALSE) + expect_equal(x, c('none', 'rare', 'occasional')) + + # convert a label to value + x <- NASISChoiceList("Clay loam", colnames = "texcl", choice = "ChoiceValue") + expect_equal(x, 17L) + + # ordered factor including obsolete choices + x <- NASISChoiceList("common", colnames = "flodfreqcl", choice = "ChoiceName", obsolete = TRUE) + expect_equal(x, structure(5L, .Label = c("none", "very rare", "rare", "occasional", + "common", "frequent", "very frequent"), + class = c("ordered", "factor"))) +}) From 784f65f6b0cb5b7b8b817b53224988f4eafb8415 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Thu, 30 Jun 2022 16:35:22 -0700 Subject: [PATCH 6/9] Update local_NASIS_defined --- R/openNASISchannel.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/openNASISchannel.R b/R/openNASISchannel.R index e40a72cd..63701e5a 100644 --- a/R/openNASISchannel.R +++ b/R/openNASISchannel.R @@ -65,8 +65,9 @@ local_NASIS_defined <- function(dsn = NULL) { if (is.null(dsn)) { # assuming that default connection uses ODBC - if (!requireNamespace("odbc")) - stop("package `odbc` is required ", call. = FALSE) + if (!requireNamespace("odbc", quietly = TRUE)) { + return(FALSE) + } if ('nasis_local' %in% odbc::odbcListDataSources()$name) { return(TRUE) @@ -75,9 +76,9 @@ local_NASIS_defined <- function(dsn = NULL) { } } else { - if (!requireNamespace("RSQLite")) - stop("package `RSQLite` is required", call. = FALSE) - + if (!requireNamespace("RSQLite", quietly = TRUE)) { + stop("package `RSQLite` is required to use `dsn` argument", call. = FALSE) + } return(RSQLite::dbCanConnect(RSQLite::SQLite(), dsn, extended_types = TRUE)) } } From cf89808e01a76c189543ab7331b7d0d04bbe1f52 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Thu, 30 Jun 2022 16:56:15 -0700 Subject: [PATCH 7/9] Docs --- R/openNASISchannel.R | 3 ++- man/local_NASIS_defined.Rd | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/openNASISchannel.R b/R/openNASISchannel.R index 63701e5a..9dad91bf 100644 --- a/R/openNASISchannel.R +++ b/R/openNASISchannel.R @@ -44,8 +44,9 @@ #' Check for presence of \code{nasis_local} ODBC data source #' -#' Check for presence of \code{nasis_local} ODBC data source +#' Check for presence of a NASIS data source. This function _always_ returns `FALSE` when the `odbc` package is not available (regardless of whether you have an ODBC data source properly set up). #' +#' If `dsn` is specified, it is assumed to refer to a SQLite data source. The result will be `TRUE` or `FALSE` depending on the result of `RSQLite::dbCanConnect()`. #' #' @param dsn Optional: path to local SQLite database containing NASIS #' table structure; default: NULL diff --git a/man/local_NASIS_defined.Rd b/man/local_NASIS_defined.Rd index ce80c0d4..9094f585 100644 --- a/man/local_NASIS_defined.Rd +++ b/man/local_NASIS_defined.Rd @@ -14,7 +14,10 @@ table structure; default: NULL} logical } \description{ -Check for presence of \code{nasis_local} ODBC data source +Check for presence of a NASIS data source. This function \emph{always} returns \code{FALSE} when the \code{odbc} package is not available (regardless of whether you have an ODBC data source properly set up). +} +\details{ +If \code{dsn} is specified, it is assumed to refer to a SQLite data source. The result will be \code{TRUE} or \code{FALSE} depending on the result of \code{RSQLite::dbCanConnect()}. } \examples{ From 681914f0976bf0d5790eb529bfb3f66f3a8db494 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Thu, 30 Jun 2022 17:01:26 -0700 Subject: [PATCH 8/9] local_NASIS_defined: add support for DBIConnection `dsn` --- NAMESPACE | 1 + R/openNASISchannel.R | 23 ++++++++++++++++------- man/local_NASIS_defined.Rd | 7 ++++--- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2d00b433..10f16187 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -152,6 +152,7 @@ export(uncode) export(waterDayYear) importClassesFrom(aqp,SoilProfileCollection) importFrom(DBI,dbConnect) +importFrom(DBI,dbExistsTable) importFrom(DBI,dbFetch) importFrom(DBI,dbGetQuery) importFrom(DBI,dbSendQuery) diff --git a/R/openNASISchannel.R b/R/openNASISchannel.R index 9dad91bf..4259cefe 100644 --- a/R/openNASISchannel.R +++ b/R/openNASISchannel.R @@ -46,10 +46,11 @@ #' #' Check for presence of a NASIS data source. This function _always_ returns `FALSE` when the `odbc` package is not available (regardless of whether you have an ODBC data source properly set up). #' -#' If `dsn` is specified, it is assumed to refer to a SQLite data source. The result will be `TRUE` or `FALSE` depending on the result of `RSQLite::dbCanConnect()`. +#' If `dsn` is specified as a character vector it is assumed to refer to a SQLite data source. The result will be `TRUE` or `FALSE` depending on the result of `RSQLite::dbCanConnect()`. #' -#' @param dsn Optional: path to local SQLite database containing NASIS -#' table structure; default: NULL +#' If `dsn` is specified as a `DBIConnection` the function returns the value of `DBI::dbExistsTable("MetadataDomainMaster")` +#' +#' @param dsn Optional: path to local SQLite database, or a DBIConnection, containing NASIS table structure; default: NULL #' @return logical #' @examples #' @@ -60,7 +61,8 @@ #' message('could not find `nasis_local` ODBC data source') #' } #' -#' @export local_NASIS_defined +#' @export +#' @importFrom DBI dbExistsTable local_NASIS_defined <- function(dsn = NULL) { if (is.null(dsn)) { @@ -70,16 +72,23 @@ local_NASIS_defined <- function(dsn = NULL) { return(FALSE) } - if ('nasis_local' %in% odbc::odbcListDataSources()$name) { + if ("nasis_local" %in% odbc::odbcListDataSources()$name) { return(TRUE) } else { return(FALSE) } - } else { + } else if (inherits(dsn, "DBIConnection")) { + + # check for metadata domain table as indicator of dsn NASIS origins + return(DBI::dbExistsTable(dsn, "MetadataDomainMaster") || + DBI::dbExistsTable(dsn, "metadatadomainmaster") ) + + } else if (is.character(dsn)) { if (!requireNamespace("RSQLite", quietly = TRUE)) { - stop("package `RSQLite` is required to use `dsn` argument", call. = FALSE) + stop("package `RSQLite` is required to use character path as `dsn` argument", call. = FALSE) } return(RSQLite::dbCanConnect(RSQLite::SQLite(), dsn, extended_types = TRUE)) } + FALSE } diff --git a/man/local_NASIS_defined.Rd b/man/local_NASIS_defined.Rd index 9094f585..e4e3e118 100644 --- a/man/local_NASIS_defined.Rd +++ b/man/local_NASIS_defined.Rd @@ -7,8 +7,7 @@ local_NASIS_defined(dsn = NULL) } \arguments{ -\item{dsn}{Optional: path to local SQLite database containing NASIS -table structure; default: NULL} +\item{dsn}{Optional: path to local SQLite database, or a DBIConnection, containing NASIS table structure; default: NULL} } \value{ logical @@ -17,7 +16,9 @@ logical Check for presence of a NASIS data source. This function \emph{always} returns \code{FALSE} when the \code{odbc} package is not available (regardless of whether you have an ODBC data source properly set up). } \details{ -If \code{dsn} is specified, it is assumed to refer to a SQLite data source. The result will be \code{TRUE} or \code{FALSE} depending on the result of \code{RSQLite::dbCanConnect()}. +If \code{dsn} is specified as a character vector it is assumed to refer to a SQLite data source. The result will be \code{TRUE} or \code{FALSE} depending on the result of \code{RSQLite::dbCanConnect()}. + +If \code{dsn} is specified as a \code{DBIConnection} the function returns the value of \code{DBI::dbExistsTable("MetadataDomainMaster")} } \examples{ From f0139d7dfac470d25d44840d71060c1a327668e2 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Fri, 1 Jul 2022 10:42:54 -0700 Subject: [PATCH 9/9] More tests --- tests/testthat/test-uncode.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testthat/test-uncode.R b/tests/testthat/test-uncode.R index 9259120d..74640dff 100644 --- a/tests/testthat/test-uncode.R +++ b/tests/testthat/test-uncode.R @@ -15,6 +15,10 @@ test_that("uncode() works w/ NASISDomainsAsFactor(TRUE)", { test_that("code() works", { x <- data.frame(texcl = c("cos", "s", "fs", "vfs", "lcos", "ls", "lfs", "lvfs", "cosl", "sl")) expect_equal(code(x)$texcl, 1:10) + + # heterogeneous names and labels + x <- data.frame(texcl = c("cos", "Sand", "fs", "vfs", "lcos", "Loamy sand", "lfs", "lvfs", "cosl", "Sandy loam")) + expect_equal(code(x)$texcl, 1:10) }) test_that("code() works w/ NASISDomainsAsFactor(TRUE)", { @@ -42,4 +46,10 @@ test_that("NASISChoiceList() works", { expect_equal(x, structure(5L, .Label = c("none", "very rare", "rare", "occasional", "common", "frequent", "very frequent"), class = c("ordered", "factor"))) + + # obsolete value, ordered factor excluding obsolete choices + x <- NASISChoiceList("common", colnames = "flodfreqcl", choice = "ChoiceName") + expect_equal(x, structure(integer(0), levels = c("none", "very rare", "rare", + "occasional", "frequent", "very frequent"), + class = c("ordered", "factor"))) })