diff --git a/NAMESPACE b/NAMESPACE index 88dc867e..10f16187 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(ISSR800.wcs) export(KSSL_VG_model) export(NASIS) +export(NASISChoiceList) export(NASISDomainsAsFactor) export(OSDquery) export(ROSETTA) @@ -41,7 +42,9 @@ export(format_SQL_in_statement) export(getHzErrorsNASIS) export(getHzErrorsPedonPC) export(get_EDIT_ecoclass_by_geoUnit) +export(get_NASIS_column_metadata) export(get_NASIS_fkey_by_name) +export(get_NASIS_metadata) export(get_NASIS_pkey_by_name) export(get_NASIS_pkeyref_by_name) export(get_NASIS_table_key_by_name) @@ -149,6 +152,7 @@ export(uncode) export(waterDayYear) importClassesFrom(aqp,SoilProfileCollection) importFrom(DBI,dbConnect) +importFrom(DBI,dbExistsTable) importFrom(DBI,dbFetch) importFrom(DBI,dbGetQuery) importFrom(DBI,dbSendQuery) diff --git a/R/openNASISchannel.R b/R/openNASISchannel.R index e40a72cd..4259cefe 100644 --- a/R/openNASISchannel.R +++ b/R/openNASISchannel.R @@ -44,11 +44,13 @@ #' Check for presence of \code{nasis_local} ODBC data source #' -#' Check for presence of \code{nasis_local} ODBC data source +#' Check for presence of a NASIS data source. This function _always_ returns `FALSE` when the `odbc` package is not available (regardless of whether you have an ODBC data source properly set up). +#' +#' If `dsn` is specified as a character vector it is assumed to refer to a SQLite data source. The result will be `TRUE` or `FALSE` depending on the result of `RSQLite::dbCanConnect()`. #' +#' If `dsn` is specified as a `DBIConnection` the function returns the value of `DBI::dbExistsTable("MetadataDomainMaster")` #' -#' @param dsn Optional: path to local SQLite database containing NASIS -#' table structure; default: NULL +#' @param dsn Optional: path to local SQLite database, or a DBIConnection, containing NASIS table structure; default: NULL #' @return logical #' @examples #' @@ -59,25 +61,34 @@ #' message('could not find `nasis_local` ODBC data source') #' } #' -#' @export local_NASIS_defined +#' @export +#' @importFrom DBI dbExistsTable local_NASIS_defined <- function(dsn = NULL) { if (is.null(dsn)) { # assuming that default connection uses ODBC - if (!requireNamespace("odbc")) - stop("package `odbc` is required ", call. = FALSE) + if (!requireNamespace("odbc", quietly = TRUE)) { + return(FALSE) + } - if ('nasis_local' %in% odbc::odbcListDataSources()$name) { + if ("nasis_local" %in% odbc::odbcListDataSources()$name) { return(TRUE) } else { return(FALSE) } - } else { + } else if (inherits(dsn, "DBIConnection")) { - if (!requireNamespace("RSQLite")) - stop("package `RSQLite` is required", call. = FALSE) + # check for metadata domain table as indicator of dsn NASIS origins + return(DBI::dbExistsTable(dsn, "MetadataDomainMaster") || + DBI::dbExistsTable(dsn, "metadatadomainmaster") ) + } else if (is.character(dsn)) { + + if (!requireNamespace("RSQLite", quietly = TRUE)) { + stop("package `RSQLite` is required to use character path as `dsn` argument", call. = FALSE) + } return(RSQLite::dbCanConnect(RSQLite::SQLite(), dsn, extended_types = TRUE)) } + FALSE } diff --git a/R/uncode.R b/R/uncode.R index 656eb0f1..c04be2dd 100644 --- a/R/uncode.R +++ b/R/uncode.R @@ -77,20 +77,7 @@ uncode <- function(df, NASISDomainsAsFactor(stringsAsFactors) } - # load current metadata table - if (local_NASIS_defined(dsn = dsn)) { - - # cache NASIS metadata in soilDB.env within an R session - if (!exists("NASIS.metadata", envir = soilDB.env)) { - metadata <- .get_NASIS_metadata(dsn = dsn) - assign('NASIS.metadata', value = metadata, envir = soilDB.env) - } else { - metadata <- get("NASIS.metadata", envir = soilDB.env) - } - - } else { - load(system.file("data/metadata.rda", package = "soilDB")[1]) - } + metadata <- get_NASIS_metadata(dsn = dsn) # unique set of possible columns that will need replacement metadata_col <- names(metadata)[grep("ColumnPhysicalName", names(metadata), ignore.case = TRUE)] @@ -111,16 +98,37 @@ uncode <- function(df, if (!invert) { # replace values with ChoiceName, try filling NA with replace based on ChoiceLabel - nc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[name_col]]) - lc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[label_col]]) - nc[is.na(nc)] <- lc[is.na(nc)] - df[[i]] <- nc - } else { + # do not explicitly set `levels` if none of the values in value_col (numeric) are present + if (any(df[[i]] %in% sub[[value_col]])) { + nc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[name_col]]) + lc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[label_col]]) + if (all(is.na(nc))) { + df[[i]] <- lc + } else { + nc[is.na(nc)] <- lc[is.na(nc)] + df[[i]] <- nc + } + df[[i]] <- nc + } else { + nc <- factor(df[[i]], levels = sub[[name_col]], labels = sub[[name_col]]) + lc <- factor(df[[i]], levels = sub[[label_col]], labels = sub[[label_col]]) + if (all(is.na(nc))) { + df[[i]] <- lc + } else { + nc[is.na(nc)] <- lc[is.na(nc)] + df[[i]] <- nc + } + } + } else if (invert) { # replace values with ChoiceName, try filling NA with replace based on ChoiceLabel nc <- factor(df[[i]], levels = sub[[name_col]], labels = sub[[value_col]]) lc <- factor(df[[i]], levels = sub[[label_col]], labels = sub[[value_col]]) - nc[is.na(nc)] <- lc[is.na(nc)] - df[[i]] <- nc + if (all(is.na(nc))) { + df[[i]] <- lc + } else { + nc[is.na(nc)] <- lc[is.na(nc)] + df[[i]] <- nc + } } } @@ -130,48 +138,26 @@ uncode <- function(df, df <- droplevels(df, except = idx) } - # convert factors to strings, check soilDB option first - if ((length(stringsAsFactors) > 0 && !stringsAsFactors) || - !getOption("soilDB.NASIS.DomainsAsFactor", default = FALSE)) { + # convert factors to strings/numeric, check soilDB option first + if (invert || (length(stringsAsFactors) > 0 && !stringsAsFactors) || !NASISDomainsAsFactor()) { idx <- unlist(lapply(df, is.factor)) - df[idx] <- lapply(df[idx], as.character) + df[idx] <- lapply(df[idx], function(x) { + type.convert(x, as.is = TRUE) + }) } return(df) } -.get_NASIS_metadata <- function(dsn = NULL) { - - q <- "SELECT mdd.DomainID, DomainName, DomainRanked, DisplayLabel, - ChoiceSequence, ChoiceValue, ChoiceName, ChoiceLabel, ChoiceObsolete, - ColumnPhysicalName, ColumnLogicalName - FROM MetadataDomainDetail mdd - INNER JOIN MetadataDomainMaster mdm ON mdm.DomainID = mdd.DomainID - INNER JOIN (SELECT MIN(DomainID) DomainID, MIN(ColumnPhysicalName) ColumnPhysicalName, MIN(ColumnLogicalName) ColumnLogicalName - FROM MetadataTableColumn GROUP BY DomainID, ColumnPhysicalName) mtc ON mtc.DomainID = mdd.DomainID - ORDER BY mdd.DomainID, ColumnPhysicalName, ChoiceValue;" - - channel <- dbConnectNASIS(dsn) - - if (inherits(channel, 'try-error')) - return(data.frame()) - - # exec query - d <- dbQueryNASIS(channel, q) - - # done - return(d) -} - # convenient, inverted version of uncode() #' @export #' @rdname uncode code <- function(df, - db = "NASIS", + db = NULL, droplevels = FALSE, stringsAsFactors = NULL, dsn = NULL) { - res <- uncode(df, invert = TRUE, db = db, droplevels = droplevels, stringsAsFactors = stringsAsFactors, dsn = dsn) + res <- uncode(df, invert = TRUE, droplevels = droplevels, stringsAsFactors = stringsAsFactors, dsn = dsn) return(res) } @@ -195,3 +181,166 @@ NASISDomainsAsFactor <- function(x = NULL) { invisible(getOption("soilDB.NASIS.DomainsAsFactor", default = FALSE)) } +#' Get NASIS Metadata (Domain, Column and Choice Lists) +#' +#' Retrieve a table containing domain and column names with choice list labels/names/sequences/values from the NASIS 7 metadata tables. +#' +#' These data are derived from the MetadataDomainDetail, MetadataDomainMaster, and MetadataTableColumn tables and help with mapping between values stored in the NASIS database and human-readable values. The human-readable values align with the values returned in public facing interfaces such as SSURGO via Soil Data Access and NASIS Web Reports. The data in these tables can also be used to create _ordered_ factors where options for levels of a particular data element follow a logical `ChoiceSequence`. +#' +#' @param dsn Optional: path to local SQLite database containing NASIS table structure; default: `NULL` +#' +#' @details If a local NASIS instance is set up, and this is the first time `get_NASIS_metadata()` has been called, the metadata will be obtained from the NASIS local database. Subsequent runs in the same session will use a copy of the data object `NASIS.metadata` cached in `soilDB.env`. +#' +#' For users without a local NASIS instance, a cached copy of the NASIS metadata are used `(data/metadata.rda)`. +#' +#' See `?soilDB::metadata` for additional details. +#' +#' @return a `data.frame` containing DomainID, DomainName, DomainRanked, DisplayLabel, ChoiceSequence, ChoiceValue, ChoiceName, ChoiceLabel, ChoiceObsolete, ColumnPhysicalName, ColumnLogicalName +#' @export +#' +#' @examples +#' get_NASIS_metadata() +get_NASIS_metadata <- function(dsn = NULL) { + + metadata <- NULL + + .doQuery <- function(dsn){ + q <- "SELECT mdd.DomainID, DomainName, DomainRanked, DisplayLabel, + ChoiceSequence, ChoiceValue, ChoiceName, ChoiceLabel, ChoiceObsolete, + ColumnPhysicalName, ColumnLogicalName + FROM MetadataDomainDetail mdd + INNER JOIN MetadataDomainMaster mdm ON mdm.DomainID = mdd.DomainID + INNER JOIN (SELECT MIN(DomainID) DomainID, MIN(ColumnPhysicalName) ColumnPhysicalName, MIN(ColumnLogicalName) ColumnLogicalName + FROM MetadataTableColumn GROUP BY DomainID, ColumnPhysicalName) mtc ON mtc.DomainID = mdd.DomainID + ORDER BY mdd.DomainID, ColumnPhysicalName, ChoiceValue;" + + channel <- dbConnectNASIS(dsn) + + if (inherits(channel, 'try-error')) + return(data.frame()) + + # exec query + dbQueryNASIS(channel, q) + } + + # load current metadata table + if (local_NASIS_defined(dsn = dsn)) { + + # cache NASIS metadata in soilDB.env within an R session + if (!exists("NASIS.metadata", envir = soilDB.env)) { + metadata <- .doQuery(dsn = dsn) + assign('NASIS.metadata', value = metadata, envir = soilDB.env) + } else { + metadata <- get("NASIS.metadata", envir = soilDB.env) + } + + } + + if (is.null(metadata) || (is.data.frame(metadata) && nrow(metadata) == 0)) { + load(system.file("data/metadata.rda", package = "soilDB")[1]) + } + + metadata +} + +#' Get NASIS metadata entries for specific domains or choices +#' +#' @param x character vector to match in NASIS metadata +#' @param what Column to match `x` against. Default `"ColumnPhysicalName"`; alternate options include `"DomainID"`, `"DomainName"`, `"DomainRanked"`, `"DisplayLabel"`, `"ChoiceSequence"`, `"ChoiceValue"`, `"ChoiceName"`, `"ChoiceLabel"`, `"ChoiceObsolete"`, `"ChoiceDescription"`, `"ColumnLogicalName"` +#' @return a `data.frame` containing selected NASIS metadata sorted first on `DomainID` and then on `ChoiceSequence` +#' @export +#' @rdname get_NASIS_metadata +#' @examples +#' get_NASIS_column_metadata("texcl") +get_NASIS_column_metadata <- function(x, + what = "ColumnPhysicalName", + dsn = NULL) { + metadata <- get_NASIS_metadata(dsn = dsn) + mds <- metadata[metadata[[what]] %in% x, ] + mds <- mds[order(mds$DomainID, mds$ChoiceSequence), ] + mds +} + +#' Work with NASIS Choice Lists +#' +#' Create (ordered) factors and interchange between choice names, values and labels for lists of input vectors. +#' +#' @param x A named list of vectors to use as input for NASIS Choice List lookup +#' @param colnames vector of values of the column specified by `what`. E.g. `colnames="texcl"` for `what="ColumnPhysicalName"`. Default: `names(x)` (if x is named) +#' @param what passed to `get_NASIS_column_metadata()`; Column to match `x` against. Default `"ColumnPhysicalName"`; alternate options include `"DomainID"`, `"DomainName"`, `"DomainRanked"`, `"DisplayLabel"`, `"ChoiceSequence"`, `"ChoiceValue"`, `"ChoiceName"`, `"ChoiceLabel"`, `"ChoiceObsolete"`, `"ChoiceDescription"`, `"ColumnLogicalName"` +#' @param choice one of: `"ChoiceName"`, `"ChoiceValue"`, or `"ChoiceLabel"` +#' @param obsolete Include "obsolete" choices? Default: `FALSE` +#' @param factor Convert result to factor? Default: `TRUE` +#' @param droplevels Drop unused factor levels? Default: `TRUE` (used only when `factor=TRUE`) +#' @param ordered Should the result be an ordered factor? Default: `TRUE` (use _only_ if `DomainRanked` is true for all choices) +#' @param simplify Should list result with length 1 be reduced to a single vector? Default: `TRUE` +#' @param dsn Optional: path to local SQLite database containing NASIS table structure; default: NULL +#' @return A list of "choices" based on the input `x` that have been converted to a consistent target set of levels (specified by `choice`) via NASIS 7 metadata. +#' +#' When `factor=TRUE` the result is a factor, possibly ordered when `ordered=TRUE` and the target domain is a "ranked" domain (i.e. `ChoiceSequence` has logical meaning). +#' +#' When `factor=FALSE` the result is a character or numeric vector. Numeric vectors are always returned when `choice` is `"ChoiceValue"`. +#' +#' @export +#' +#' @examples +#' +#' NASISChoiceList(1:3, "texcl") +#' +#' NASISChoiceList(1:3, "pondfreqcl") +#' +#' NASISChoiceList("Clay loam", "texcl", choice = "ChoiceValue") +#' +#' NASISChoiceList("Silty clay loam", "texcl", choice = "ChoiceName") +NASISChoiceList <- function(x, + colnames = names(x), + what = "ColumnPhysicalName", + choice = c("ChoiceName", "ChoiceValue", "ChoiceLabel"), + obsolete = FALSE, + factor = TRUE, + droplevels = FALSE, + ordered = TRUE, + simplify = TRUE, + dsn = NULL) { + choice <- match.arg(choice, choices = c("ChoiceName", "ChoiceValue", "ChoiceLabel")) + if (!is.list(x)) { + n <- colnames + x <- list(x) + if (length(n) == length(x)) { + names(x) <- n + } + } + res <- lapply(names(x), function(xx) { + y <- get_NASIS_column_metadata(xx, what = what, dsn = dsn) + if (!obsolete) { + y <- y[y$ChoiceObsolete == 0, ] + } + idx <- na.omit(as.numeric(apply(do.call('cbind', lapply(y[c("ChoiceValue", "ChoiceName", "ChoiceLabel")], function(xxx) match(x[[xx]], xxx))), MARGIN = 1, \(xxx) as.numeric(na.omit(xxx))))) + yy <- y[idx,] + if (choice != "ChoiceValue" && factor) { + f <- factor(yy[[choice]], + levels = y[[choice]], + ordered = ordered && all(yy$DomainRanked)) + if (droplevels) { + return(droplevels(x)) + } else { + return(f) + } + } else { + return(yy[[choice]]) + } + }) + + if (simplify && length(res) == 1) { + return(res[[1]]) + } + res +} + +#' @keywords internal +#' @noRd +.get_NASIS_metadata <- function(dsn = NULL) { + # for backward compatibility or anyone who is using the .get method in the wild + .Deprecated("get_NASIS_metadata") + get_NASIS_metadata(dsn) +} diff --git a/R/utils.R b/R/utils.R index 982821d1..fd3a7631 100644 --- a/R/utils.R +++ b/R/utils.R @@ -734,47 +734,48 @@ orig_names <- names(df) # relabel names - names(df) <- gsub("^soimoist", "", names(df)) - old_names <- "stat" - new_names <- "status" - names(df)[names(df) %in% old_names] <- new_names - + # names(df) <- gsub("^soimoist", "", names(df)) + # old_names <- "stat" + # new_names <- "status" + # names(df)[names(df) %in% old_names] <- new_names # setting frequency levels and order + + # NOTE: the next block of code require factor levels be set, regardless of package options # NOTE: the SDA domains for flooding and ponding have different levels, and "Common" is obsolete - # TODO: replace with ordering derived from NASIS ChoiceSequence - flod_lev <- factor(df$flodfreqcl, levels = c("None", "Very rare", "Rare", "Occasional", "Common", "Frequent", "Very frequent")) - pond_lev <- factor(df$pondfreqcl, levels = c("None", "Rare", "Occasional", "Common", "Frequent")) + # .:. ordering implied in get_NASIS_column_metadata result from NASIS ChoiceSequence + flod_lev <- factor(df$flodfreqcl, levels = get_NASIS_column_metadata("flodfreqcl")$ChoiceLabel) + pond_lev <- factor(df$pondfreqcl, levels = get_NASIS_column_metadata("pondfreqcl")$ChoiceLabel) + mois_lev <- factor(df$soimoiststat, levels = get_NASIS_column_metadata("soimoiststat")$ChoiceLabel) + # impute NA freqcl values, default = "not populated" - if (impute == TRUE) { + if (impute) { missing <- "Not populated" - lev_flodfreqcl <- c(missing, levels(df$flodfreqcl)) - lev_pondfreqcl <- c(missing, levels(df$pondfreqcl)) - lev_status <- c(missing, levels(df$status)) - - df <- within(df, { - # replace NULL RV depths with 201 cm if pondfreqcl or flodqcl is not NULL - dept_r[is.na(dept_r) & (!is.na(pondfreqcl) | !is.na(flodfreqcl))] = 201 - depb_r[is.na(depb_r) & (!is.na(pondfreqcl) | !is.na(flodfreqcl))] = 201 - - # replace NULL L and H depths with the RV - dept_l = ifelse(is.na(dept_l), dept_r, dept_l) - dept_h = ifelse(is.na(dept_h), dept_r, dept_h) - - depb_l = ifelse(is.na(depb_l), depb_r, depb_l) - depb_h = ifelse(is.na(depb_h), depb_r, depb_h) - - # replace NULL freqcl with "Not_Populated" - status = factor(levels(status)[status], levels = lev_status) - flodfreqcl = factor(levels(flodfreqcl)[flodfreqcl], levels = lev_flodfreqcl) - pondfreqcl = factor(levels(pondfreqcl)[pondfreqcl], levels = lev_flodfreqcl) - - status[is.na(status)] <- missing - flodfreqcl[is.na(flodfreqcl)] <- missing - pondfreqcl[is.na(pondfreqcl)] <- missing - }) + lev_flodfreqcl <- c(missing, levels(flod_lev)) + lev_pondfreqcl <- c(missing, levels(pond_lev)) + lev_status <- c(missing, levels(mois_lev)) + + # replace NULL RV depths with 201 cm if pondfreqcl or flodqcl is not NULL + df$soimoistdept_r[is.na(df$soimoistdept_r) & (!is.na(df$pondfreqcl) | !is.na(df$flodfreqcl))] <- 201 + df$soimoistdepb_r[is.na(df$soimoistdepb_r) & (!is.na(df$pondfreqcl) | !is.na(df$flodfreqcl))] <- 201 + + # replace NULL L and H depths with the RV + df$soimoistdept_l <- ifelse(is.na(df$soimoistdept_l), df$soimoistdept_r, df$soimoistdept_l) + df$soimoistdept_h <- ifelse(is.na(df$soimoistdept_h), df$soimoistdept_r, df$soimoistdept_h) + df$soimoistdepb_l <- ifelse(is.na(df$soimoistdepb_l), df$soimoistdepb_r, df$soimoistdepb_l) + df$soimoistdepb_h <- ifelse(is.na(df$soimoistdepb_h), df$soimoistdepb_r, df$soimoistdepb_h) + + # relevel factors with "Not populated" as first level + df$soimoiststat <- factor(as.character(df$soimoiststat), levels = lev_status) + df$flodfreqcl <- factor(as.character(df$flodfreqcl), levels = lev_flodfreqcl) + df$pondfreqcl <- factor(as.character(df$pondfreqcl), levels = lev_flodfreqcl) + + # replace NULL moist state and frequency class with "Not populated" + df$soimoiststat[is.na(df$soimoiststat)] <- missing + df$flodfreqcl[is.na(df$flodfreqcl)] <- missing + df$pondfreqcl[is.na(df$pondfreqcl)] <- missing } # convert factors to strings diff --git a/man/NASISChoiceList.Rd b/man/NASISChoiceList.Rd new file mode 100644 index 00000000..f37c2574 --- /dev/null +++ b/man/NASISChoiceList.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/uncode.R +\name{NASISChoiceList} +\alias{NASISChoiceList} +\title{Work with NASIS Choice Lists} +\usage{ +NASISChoiceList( + x, + colnames = names(x), + what = "ColumnPhysicalName", + choice = c("ChoiceName", "ChoiceValue", "ChoiceLabel"), + obsolete = FALSE, + factor = TRUE, + droplevels = FALSE, + ordered = TRUE, + simplify = TRUE, + dsn = NULL +) +} +\arguments{ +\item{x}{A named list of vectors to use as input for NASIS Choice List lookup} + +\item{colnames}{vector of values of the column specified by \code{what}. E.g. \code{colnames="texcl"} for \code{what="ColumnPhysicalName"}. Default: \code{names(x)} (if x is named)} + +\item{what}{passed to \code{get_NASIS_column_metadata()}; Column to match \code{x} against. Default \code{"ColumnPhysicalName"}; alternate options include \code{"DomainID"}, \code{"DomainName"}, \code{"DomainRanked"}, \code{"DisplayLabel"}, \code{"ChoiceSequence"}, \code{"ChoiceValue"}, \code{"ChoiceName"}, \code{"ChoiceLabel"}, \code{"ChoiceObsolete"}, \code{"ChoiceDescription"}, \code{"ColumnLogicalName"}} + +\item{choice}{one of: \code{"ChoiceName"}, \code{"ChoiceValue"}, or \code{"ChoiceLabel"}} + +\item{obsolete}{Include "obsolete" choices? Default: \code{FALSE}} + +\item{factor}{Convert result to factor? Default: \code{TRUE}} + +\item{droplevels}{Drop unused factor levels? Default: \code{TRUE} (used only when \code{factor=TRUE})} + +\item{ordered}{Should the result be an ordered factor? Default: \code{TRUE} (use \emph{only} if \code{DomainRanked} is true for all choices)} + +\item{simplify}{Should list result with length 1 be reduced to a single vector? Default: \code{TRUE}} + +\item{dsn}{Optional: path to local SQLite database containing NASIS table structure; default: NULL} +} +\value{ +A list of "choices" based on the input \code{x} that have been converted to a consistent target set of levels (specified by \code{choice}) via NASIS 7 metadata. + +When \code{factor=TRUE} the result is a factor, possibly ordered when \code{ordered=TRUE} and the target domain is a "ranked" domain (i.e. \code{ChoiceSequence} has logical meaning). + +When \code{factor=FALSE} the result is a character or numeric vector. Numeric vectors are always returned when \code{choice} is \code{"ChoiceValue"}. +} +\description{ +Create (ordered) factors and interchange between choice names, values and labels for lists of input vectors. +} +\examples{ + +NASISChoiceList(1:3, "texcl") + +NASISChoiceList(1:3, "pondfreqcl") + +NASISChoiceList("Clay loam", "texcl", choice = "ChoiceValue") + +NASISChoiceList("Silty clay loam", "texcl", choice = "ChoiceName") +} diff --git a/man/get_NASIS_metadata.Rd b/man/get_NASIS_metadata.Rd new file mode 100644 index 00000000..7ed0b9c9 --- /dev/null +++ b/man/get_NASIS_metadata.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/uncode.R +\name{get_NASIS_metadata} +\alias{get_NASIS_metadata} +\alias{get_NASIS_column_metadata} +\title{Get NASIS Metadata (Domain, Column and Choice Lists)} +\usage{ +get_NASIS_metadata(dsn = NULL) + +get_NASIS_column_metadata(x, what = "ColumnPhysicalName", dsn = NULL) +} +\arguments{ +\item{dsn}{Optional: path to local SQLite database containing NASIS table structure; default: \code{NULL}} + +\item{x}{character vector to match in NASIS metadata} + +\item{what}{Column to match \code{x} against. Default \code{"ColumnPhysicalName"}; alternate options include \code{"DomainID"}, \code{"DomainName"}, \code{"DomainRanked"}, \code{"DisplayLabel"}, \code{"ChoiceSequence"}, \code{"ChoiceValue"}, \code{"ChoiceName"}, \code{"ChoiceLabel"}, \code{"ChoiceObsolete"}, \code{"ChoiceDescription"}, \code{"ColumnLogicalName"}} +} +\value{ +a \code{data.frame} containing DomainID, DomainName, DomainRanked, DisplayLabel, ChoiceSequence, ChoiceValue, ChoiceName, ChoiceLabel, ChoiceObsolete, ColumnPhysicalName, ColumnLogicalName + +a \code{data.frame} containing selected NASIS metadata sorted first on \code{DomainID} and then on \code{ChoiceSequence} +} +\description{ +Retrieve a table containing domain and column names with choice list labels/names/sequences/values from the NASIS 7 metadata tables. +} +\details{ +These data are derived from the MetadataDomainDetail, MetadataDomainMaster, and MetadataTableColumn tables and help with mapping between values stored in the NASIS database and human-readable values. The human-readable values align with the values returned in public facing interfaces such as SSURGO via Soil Data Access and NASIS Web Reports. The data in these tables can also be used to create \emph{ordered} factors where options for levels of a particular data element follow a logical \code{ChoiceSequence}. + +If a local NASIS instance is set up, and this is the first time \code{get_NASIS_metadata()} has been called, the metadata will be obtained from the NASIS local database. Subsequent runs in the same session will use a copy of the data object \code{NASIS.metadata} cached in \code{soilDB.env}. + +For users without a local NASIS instance, a cached copy of the NASIS metadata are used \code{(data/metadata.rda)}. + +See \code{?soilDB::metadata} for additional details. +} +\examples{ +get_NASIS_metadata() +get_NASIS_column_metadata("texcl") +} diff --git a/man/local_NASIS_defined.Rd b/man/local_NASIS_defined.Rd index ce80c0d4..e4e3e118 100644 --- a/man/local_NASIS_defined.Rd +++ b/man/local_NASIS_defined.Rd @@ -7,14 +7,18 @@ local_NASIS_defined(dsn = NULL) } \arguments{ -\item{dsn}{Optional: path to local SQLite database containing NASIS -table structure; default: NULL} +\item{dsn}{Optional: path to local SQLite database, or a DBIConnection, containing NASIS table structure; default: NULL} } \value{ logical } \description{ -Check for presence of \code{nasis_local} ODBC data source +Check for presence of a NASIS data source. This function \emph{always} returns \code{FALSE} when the \code{odbc} package is not available (regardless of whether you have an ODBC data source properly set up). +} +\details{ +If \code{dsn} is specified as a character vector it is assumed to refer to a SQLite data source. The result will be \code{TRUE} or \code{FALSE} depending on the result of \code{RSQLite::dbCanConnect()}. + +If \code{dsn} is specified as a \code{DBIConnection} the function returns the value of \code{DBI::dbExistsTable("MetadataDomainMaster")} } \examples{ diff --git a/man/uncode.Rd b/man/uncode.Rd index 164c79e9..09cb50c9 100644 --- a/man/uncode.Rd +++ b/man/uncode.Rd @@ -14,7 +14,7 @@ uncode( dsn = NULL ) -code(df, db = "NASIS", droplevels = FALSE, stringsAsFactors = NULL, dsn = NULL) +code(df, db = NULL, droplevels = FALSE, stringsAsFactors = NULL, dsn = NULL) } \arguments{ \item{df}{data.frame} diff --git a/tests/testthat/test-uncode.R b/tests/testthat/test-uncode.R new file mode 100644 index 00000000..74640dff --- /dev/null +++ b/tests/testthat/test-uncode.R @@ -0,0 +1,55 @@ +test_that("uncode() works", { + x <- data.frame(texcl = 1:10) + expect_equal(uncode(x)$texcl, c("cos", "s", "fs", "vfs", "lcos", "ls", "lfs", "lvfs", "cosl", "sl")) +}) + +test_that("uncode() works w/ NASISDomainsAsFactor(TRUE)", { + NASISDomainsAsFactor(TRUE) + x <- data.frame(texcl = 1:10) + expect_equal(uncode(x)$texcl, structure(1:10, .Label = c("cos", "s", "fs", "vfs", "lcos", "ls", + "lfs", "lvfs", "cosl", "sl", "fsl", "vfsl", "l", "sil", "si", + "scl", "cl", "sicl", "sc", "sic", "c"), class = "factor")) + NASISDomainsAsFactor(FALSE) +}) + +test_that("code() works", { + x <- data.frame(texcl = c("cos", "s", "fs", "vfs", "lcos", "ls", "lfs", "lvfs", "cosl", "sl")) + expect_equal(code(x)$texcl, 1:10) + + # heterogeneous names and labels + x <- data.frame(texcl = c("cos", "Sand", "fs", "vfs", "lcos", "Loamy sand", "lfs", "lvfs", "cosl", "Sandy loam")) + expect_equal(code(x)$texcl, 1:10) +}) + +test_that("code() works w/ NASISDomainsAsFactor(TRUE)", { + NASISDomainsAsFactor(TRUE) + x <- data.frame(texcl = c("cos", "s", "fs", "vfs", "lcos", "ls", "lfs", "lvfs", "cosl", "sl")) + expect_equal(code(x)$texcl, 1:10) + NASISDomainsAsFactor(FALSE) +}) + +test_that("NASISChoiceList() works", { + x <- NASISChoiceList(1:3, colnames = "texcl") + expect_equal(x, structure(c(3L, 12L, 5L), .Label = c("c", "cl", "cos", "cosl", + "fs", "fsl", "l", "lcos", "lfs", "ls", "lvfs", "s", "sc", "scl", + "si", "sic", "sicl", "sil", "sl", "vfs", "vfsl"), class = "factor")) + + x <- NASISChoiceList(1:3, colnames = "pondfreqcl", factor = FALSE) + expect_equal(x, c('none', 'rare', 'occasional')) + + # convert a label to value + x <- NASISChoiceList("Clay loam", colnames = "texcl", choice = "ChoiceValue") + expect_equal(x, 17L) + + # ordered factor including obsolete choices + x <- NASISChoiceList("common", colnames = "flodfreqcl", choice = "ChoiceName", obsolete = TRUE) + expect_equal(x, structure(5L, .Label = c("none", "very rare", "rare", "occasional", + "common", "frequent", "very frequent"), + class = c("ordered", "factor"))) + + # obsolete value, ordered factor excluding obsolete choices + x <- NASISChoiceList("common", colnames = "flodfreqcl", choice = "ChoiceName") + expect_equal(x, structure(integer(0), levels = c("none", "very rare", "rare", + "occasional", "frequent", "very frequent"), + class = c("ordered", "factor"))) +})