Skip to content

Commit

Permalink
Merge pull request #46 from ncss-tech/parse-taxaabovefam
Browse files Browse the repository at this point in the history
Extend `parse_family()` to work with taxa above family
  • Loading branch information
brownag authored Nov 13, 2023
2 parents 6264a44 + e28e98c commit d04c9b0
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 21 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Title: A System of Soil Classification for Making and Interpreting Soil Surveys
Description: Taxonomic dictionaries, formative element lists, and functions related to the maintenance, development and application of U.S. Soil Taxonomy.
Data and functionality are based on official U.S. Department of Agriculture sources including the latest edition of the Keys to Soil Taxonomy. Descriptions and metadata are obtained from the National Soil Information System or Soil Survey Geographic databases. Other sources are referenced in the data documentation.
Provides tools for understanding and interacting with concepts in the U.S. Soil Taxonomic System. Most of the current utilities are for working with taxonomic concepts at the "higher" taxonomic levels: Order, Suborder, Great Group, and Subgroup.
Version: 0.2.3
Version: 0.2.4
Authors@R: c(person(given="Andrew", family="Brown", email="[email protected]", role = c("aut", "cre")), person(given="Dylan", family="Beaudette", role = c("aut"), email = "[email protected]"))
Maintainer: Andrew Brown <[email protected]>
Depends: R (>= 3.5)
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@ importFrom(stringr,fixed)
importFrom(stringr,str_locate)
importFrom(stringr,str_locate_all)
importFrom(utils,browseURL)
importFrom(utils,tail)
importFrom(utils,type.convert)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# SoilTaxonomy 0.2.4
- `taxonTree()` default markup via `special.chars` argument emulates `fs::dir_tree()` output
- `parse_family()` now returns more complete information for taxa above family
- `taxminalogy` column for taxa with strongly contrasting control sections now use `" over "` as the separator between class names.

# SoilTaxonomy 0.2.3 (2023-02-01)
- Fix unintended case-sensitivity of `FormativeElements()`; thanks to Shawn Salley (@swsalley)
- Fix for `extractSMR()` via fix for `FormativeElements()` applied at multiple levels (affects taxa above subgroup level)
Expand Down
37 changes: 22 additions & 15 deletions R/family-classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ parse_family <- function(family, column_metadata = TRUE, flat = TRUE) {
# load local copy of taxon code lookup table
load(system.file("data/ST_unique_list.rda", package = "SoilTaxonomy")[1])

lut <- ST_unique_list[["subgroup"]]
lut <- do.call('c', ST_unique_list)

# lookup table sorted from largest to smallest (most specific to least)
lut <- lut[order(nchar(lut), decreasing = TRUE)]
Expand All @@ -40,13 +40,16 @@ parse_family <- function(family, column_metadata = TRUE, flat = TRUE) {
subgroup.idx <- sapply(res, function(x) which(!is.na(x[,1]))[1])
subgroup.pos <- sapply(seq_along(subgroup.idx), function(i) res[[i]][subgroup.idx[i], 'start'])

subgroups <- lut[subgroup.idx]
taxname <- lut[subgroup.idx]
lowest_level <- taxon_to_level(taxname)
family_classes <- trimws(substr(family, 0, subgroup.pos - 1))

taxon_codes <- taxon_to_taxon_code(taxname)
res <- data.frame(row.names = NULL, stringsAsFactors = FALSE,
family = family,
subgroup = subgroups,
subgroup_code = taxon_to_taxon_code(subgroups),
family = ifelse(nchar(family_classes) > 0, family, NA_character_),
taxclname = family,
taxonname = taxname,
subgroup_code = ifelse(lowest_level == "subgroup", taxon_codes, NA_character_),
code = taxon_codes,
class_string = family_classes,
classes_split = I(lapply(strsplit(family_classes, ","), trimws)))

Expand All @@ -56,7 +59,7 @@ parse_family <- function(family, column_metadata = TRUE, flat = TRUE) {
}

#' @import data.table
#' @importFrom utils type.convert
#' @importFrom utils type.convert tail
#' @importFrom stats setNames na.omit
.get_family_differentiae <- function(res, flat = TRUE) {

Expand Down Expand Up @@ -125,10 +128,13 @@ parse_family <- function(family, column_metadata = TRUE, flat = TRUE) {
)
})

taxsub <- as.data.frame(do.call('rbind', lapply(decompose_taxon_code(res$subgroup_code), function(x) taxon_code_to_taxon(as.character(rev(x))))),
stringsAsFactors = FALSE)
colnames(taxsub) <- rev(c("taxorder", "taxsuborder", "taxgrtgroup", "taxsubgrp"))
rownames(taxsub) <- NULL
taxsub <- as.data.frame(data.table::rbindlist(lapply(decompose_taxon_code(res$code), function(x) {
y <- taxon_code_to_taxon(as.character(rev(x)))
z <- data.frame(taxsubgrp = NA_character_, taxgrtgroup = NA_character_,
taxsuborder = NA_character_, taxorder = NA_character_)
z[1, ] <- tail(c(rep(NA_character_, 4), y), 4)
z
})), stringsAsFactors = FALSE)

res4 <- lapply(seq_along(res2), function(i) {
x <- res2[[i]]
Expand Down Expand Up @@ -160,9 +166,9 @@ parse_family <- function(family, column_metadata = TRUE, flat = TRUE) {

res5 <- as.list(data.table::rbindlist(c(list(basetbl), res4), fill = TRUE))
multi.names <- c("taxminalogy", "taxfamother")
.FUN <- function(x) list(x)
.flat_FUN <- function(x) {
y <- paste0(na.omit(x), collapse = ", ")
.FUN <- function(x, sep = NULL) list(x)
.flat_FUN <- function(x, sep = ", ") {
y <- paste0(na.omit(x), collapse = sep)
if (nchar(y) == 0) return(NA_character_)
y
}
Expand All @@ -171,7 +177,8 @@ parse_family <- function(family, column_metadata = TRUE, flat = TRUE) {
}

res5[multi.names] <- lapply(multi.names, function(n) {
res6 <- apply(data.frame(res5[names(res5) %in% n]), 1, .FUN)
res6 <- apply(data.frame(res5[names(res5) %in% n]), 1, .FUN,
sep = ifelse(n == "taxminalogy", " over ", ", "))
res6 <- lapply(res6, function(nn) {
nnn <- nn[[1]]
lr6 <- length(nnn)
Expand Down
26 changes: 21 additions & 5 deletions tests/testthat/test-parseFamily.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ test_that("parse_family(..., column_metadata = FALSE) works", {

res <- parse_family(families, column_metadata = FALSE)

expect_equal(res$subgroup_code,
expect_equal(res$code,
c("HCDN", "JDGR", "BDDH", "KFFK",
"HCDN", "JDGR", "BDDH", "KFFK"))
})
Expand All @@ -29,15 +29,15 @@ test_that("parse_family(..., column_metadata = TRUE) works", {

res <- parse_family(families, column_metadata = TRUE)

expect_equal(res$subgroup_code,
expect_equal(res$code,
c("HCDN", "JDGR", "BDDH", "KFFK",
"HCDN", "JDGR", "BDDH", "KFFK"))
})

test_that("complex or uncommon family classes", {

skip_if_not_installed("soilDB")

# mapping of "diatomaceous" mineralogy class -> "diatomaceous earth" choicename for taxminalogy
x <- parse_family("DIATOMACEOUS, EUIC, FRIGID LIMNIC HAPLOHEMISTS")
expect_true(x$taxminalogy == "diatomaceous earth" && x$taxreaction == "euic")
Expand All @@ -48,7 +48,7 @@ test_that("complex or uncommon family classes", {

# compound family classes such as "amorphic over isotic" for strongly contrasting control section
x <- parse_family("MEDIAL-SKELETAL OVER LOAMY-SKELETAL, AMORPHIC OVER ISOTIC, FRIGID ANDIC HAPLORTHODS")
expect_true(x$taxminalogy == "amorphic, isotic" &&
expect_true(x$taxminalogy == "amorphic over isotic" &&
x$taxpartsize == "medial-skeletal over loamy-skeletal",
x$classes_split[[1]][2] == "AMORPHIC OVER ISOTIC")

Expand All @@ -67,7 +67,7 @@ test_that("complex or uncommon family classes", {
"FINE, MIXED, ACTIVE, MESIC OXYAQUIC HAPLUDALFS",
"MEDIAL-SKELETAL OVER LOAMY-SKELETAL, AMORPHIC OVER ISOTIC, FRIGID ANDIC HAPLORTHODS"),
flat = TRUE)
expect_equal(x$taxminalogy, c("isotic", "smectitic", "isotic", "mixed", "amorphic, isotic"))
expect_equal(x$taxminalogy, c("isotic", "smectitic", "isotic", "mixed", "amorphic over isotic"))
expect_equal(x$taxfamother, c(NA, NA, "shallow, ortstein", NA, NA))

# test flat=FALSE (many taxa)
Expand All @@ -83,3 +83,19 @@ test_that("complex or uncommon family classes", {
c(taxminalogy1 = "mixed", taxminalogy2 = NA),
c(taxminalogy1 = "amorphic", taxminalogy2 = "isotic"))))
})

test_that("taxa above family and incomplete family names", {
x <- data.frame(
taxonname = c("Alberti", "Aquents", "Lithic Xeric Torriorthents", "Stagy Family", "Haplodurids"),
taxonkind = c("series", "taxon above family", "taxon above family", "family", "taxon above family"),
taxclname = c(
"Clayey, smectitic, thermic, shallow Vertic Rhodoxeralfs", # Full family name
"Aquents", # Taxon above subgroup
"Lithic Xeric Torriorthents", # Subgroup
"Coarse-loamy, mixed, mesic Duric Haploxerolls", # Family name missing activity class
"Mixed, superactive, thermic Haplodurids" # Taxon above family (family classes + great group)
))
res <- parse_family(x$taxclname)
expect_equal(res$taxsuborder, c("Xeralfs", "Aquents", "Orthents", "Xerolls", "Durids"))
expect_equal(res$taxgrtgroup, c("Rhodoxeralfs", NA_character_, "Torriorthents", "Haploxerolls", "Haplodurids"))
})

0 comments on commit d04c9b0

Please sign in to comment.