Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fetchNASIS upgrades #211

Merged
merged 3 commits into from
Oct 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion R/fetchNASISWebReport.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ fetchNASISWebReport <- function(projectname, rmHzErrors = FALSE, fill = FALSE,

# print any messages on possible data quality problems:
if (exists('component.hz.problems', envir=soilDB.env))
message("-> QC: horizon errors detected, use `get('component.hz.problems', envir=soilDB.env)` for related cokey values")
message("-> QC: horizon errors detected:\n\tUse `get('component.hz.problems', envir=soilDB.env)` for component keys (cokey)")

# done, return SPC
return(list(spc = f.chorizon, mapunit = f.mapunit))
Expand Down
33 changes: 18 additions & 15 deletions R/fetchNASIS_components.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,30 +65,33 @@
# add site data to object
site(f.chorizon) <- f.comp # left-join via coiid

## TODO: convert all ddply() calls into split() -> lapply() -> do.call('rbind')

# join-in copm strings
## 2017-3-13: short-circuts need testing, consider pre-marking mistakes before parsing
pm <- plyr::ddply(f.copm, 'coiid', .formatcoParentMaterialString, name.sep=' & ')
if(nrow(pm) > 0)
## 2021-10-28: TODO: harmonize strategies for .formatXXXXString methods and ID variables
.SD <- NULL
.BY <- NULL

# join-in copm strings
pm <- data.table::data.table(f.copm)[, .formatParentMaterialString(.SD, uid = .BY$coiid, name.sep=' & '), by = "coiid"]
pm$siteiid <- NULL
if (nrow(pm) > 0)
site(f.chorizon) <- pm

# join-in cogeomorph strings
## 2017-3-13: short-circuts need testing, consider pre-marking mistakes before parsing
lf <- plyr::ddply(f.cogeomorph, 'coiid', .formatcoLandformString, name.sep=' & ')
if(nrow(lf) > 0)
lf <- data.table::data.table(f.cogeomorph)[, .formatLandformString(.SD, uid = .BY$coiid, name.sep=' & '), by = "coiid"]
pm$peiid <- NULL
if (nrow(lf) > 0)
site(f.chorizon) <- lf

# join-in ecosite string
## 2017-3-06: short-circuts need testing, consider pre-marking mistakes before parsing
es <- plyr::ddply(f.ecosite, 'coiid', .formatEcositeString, name.sep=' & ')
if(nrow(es) > 0)
es <- data.table::data.table(f.ecosite)[, .formatEcositeString(.SD, name.sep=' & '), by = "coiid", .SDcols = colnames(f.ecosite)]
es$coiid <- NULL
if (nrow(es) > 0)
site(f.chorizon) <- es

# join-in othervegclass string
## 2017-3-06: short-circuts need testing, consider pre-marking mistakes before parsing
ov <- plyr::ddply(f.otherveg, 'coiid', .formatOtherVegString, name.sep=' & ')
if(nrow(ov) > 0)
ov <- data.table::data.table(f.otherveg)[, .formatOtherVegString(.SD, name.sep=' & '), by = "coiid", .SDcols = colnames(f.otherveg)]
ov$coiid <- NULL
if (nrow(ov) > 0)
site(f.chorizon) <- ov

# add diagnostic features to SPC
Expand All @@ -101,7 +104,7 @@
# print any messages on possible data quality problems:
if(exists('component.hz.problems', envir=soilDB.env))
if(length(get("component.hz.problems", envir = soilDB.env)) > 0)
message("-> QC: horizon errors detected, use `get('component.hz.problems', envir=soilDB.env)` for related coiid values")
message("-> QC: horizon errors detected:\n\tUse `get('component.hz.problems', envir=soilDB.env)` for component record IDs (coiid)")

# set NASIS component specific horizon identifier
if(!fill & length(filled.ids) == 0) {
Expand Down
10 changes: 5 additions & 5 deletions R/fetchNASIS_pedons.R
Original file line number Diff line number Diff line change
Expand Up @@ -255,11 +255,11 @@
# print any messages on possible data quality problems:
if (exists('sites.missing.pedons', envir = soilDB.env))
if (length(get('sites.missing.pedons', envir = soilDB.env)) > 0)
message("-> QC: sites without pedons: see `get('sites.missing.pedons', envir=soilDB.env)`")
message("-> QC: sites without pedons: \n\tUse `get('sites.missing.pedons', envir=soilDB.env) for site record IDs (siteiid)`")

if (exists('dup.pedon.ids', envir = soilDB.env))
if (length(get('dup.pedon.ids', envir = soilDB.env)) > 0)
message("-> QC: duplicate pedons: see `get('dup.pedon.ids', envir=soilDB.env)`")
message("-> QC: duplicate pedons: \n\tUse `get('dup.pedon.ids', envir=soilDB.env) for pedon record IDs (peiid)`")

# set NASIS component specific horizon identifier
if(!fill & length(filled.ids) == 0) {
Expand All @@ -282,15 +282,15 @@

if (exists('bad.pedon.ids', envir = soilDB.env))
if (length(get('bad.pedon.ids', envir = soilDB.env)) > 0)
message("-> QC: horizon errors detected, use `get('bad.pedon.ids', envir=soilDB.env)` for related userpedonid values or `get('bad.horizons', envir=soilDB.env)` for related horizon designations")
message("-> QC: horizon errors detected:\n\tUse `get('bad.pedon.ids', envir=soilDB.env)` for pedon record IDs (peiid)\n\tUse `get('bad.horizons', envir=soilDB.env)` for horizon designations")

if (exists('missing.bottom.depths', envir = soilDB.env))
if (length(get('missing.bottom.depths', envir = soilDB.env)) > 0)
message("-> QC: pedons missing bottom hz depths: use `get('missing.bottom.depths', envir=soilDB.env)` for related pedon IDs")
message("-> QC: pedons missing bottom hz depths:\n\tUse `get('missing.bottom.depths', envir=soilDB.env)` for pedon record IDs (peiid)")

if (exists('top.bottom.equal', envir = soilDB.env))
if (length(get('top.bottom.equal', envir = soilDB.env)) > 0)
message("-> QC: equal hz top and bottom depths: use `get('top.bottom.equal', envir=soilDB.env)` for related pedon IDs")
message("-> QC: equal hz top and bottom depths:\n\tUse `get('top.bottom.equal', envir=soilDB.env)` for pedon record IDs (peiid)")

# done
return(hz_data)
Expand Down
29 changes: 20 additions & 9 deletions R/get_component_data_from_NASIS_db.R
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ get_component_data_from_NASIS_db <- function(SS = TRUE,
if (length(idx) > 0) {
dupes <- names(idx)
assign('dupe.coiids', value=dupes, envir=soilDB.env)
message("-> QC: duplicate coiids, this should not happen. Use `get('dupe.coiids', envir=soilDB.env)` for related coiid values.")
message("-> QC: duplicate coiids, this should not happen.\n\tUse `get('dupe.coiids', envir=soilDB.env)` for component record IDs (coiid)")
}

# uncode metadata domains
Expand Down Expand Up @@ -485,7 +485,7 @@ get_component_correlation_data_from_NASIS_db <- function(SS = TRUE,
if(length(idx) > 0) {
dupes <- names(idx)
assign('dupe.muiids', value=dupes, envir=soilDB.env)
message("-> QC: duplicate muiids: multiple 'representative' DMU / MU?. Use `get('dupe.muiids', envir=soilDB.env)` for related muiid values.")
message("-> QC: duplicate muiids: multiple 'representative' DMU / MU?.\n\tUse `get('dupe.muiids', envir=soilDB.env)` for mapunit record IDs (muiid)")
}


Expand All @@ -494,7 +494,7 @@ get_component_correlation_data_from_NASIS_db <- function(SS = TRUE,
if(length(idx) > 0) {
dupes <- names(idx)
assign('multiple.mu.per.dmu', value=dupes, envir=soilDB.env)
message("-> QC: DMUs assigned to multiple MU. Use `get('multiple.mu.per.dmu', envir=soilDB.env)` for related dmuiid values.")
message("-> QC: DMUs assigned to multiple MU.\n\tUse `get('multiple.mu.per.dmu', envir=soilDB.env)` for data mapunit record IDs (dmuiid)")
}


Expand Down Expand Up @@ -597,7 +597,7 @@ get_component_esd_data_from_NASIS_db <- function(SS = TRUE,
dupes <- names(idx)
assign('multiple.ecosite.per.coiid', value=dupes, envir=soilDB.env)
if (length(idx) > 0) {
message("-> QC: multiple ecosites / component. Use `get('multiple.ecosite.per.coiid', envir=soilDB.env)` for related coiid values.")
message("-> QC: multiple ecosites / component.\n\tUse `get('multiple.ecosite.per.coiid', envir=soilDB.env)` for component record IDs (coiid)")
}

# uncode metadata domains
Expand Down Expand Up @@ -635,7 +635,7 @@ get_component_otherveg_data_from_NASIS_db <- function(SS = TRUE, dsn = NULL) {
if (length(idx) > 0) {
dupes <- names(idx)
assign('multiple.otherveg.per.coiid', value=dupes, envir=soilDB.env)
message("-> QC: multiple othervegclasses / component. Use `get('multiple.otherveg.per.coiid', envir=soilDB.env)` for related coiid values.")
message("-> QC: multiple othervegclasses / component.\n\tUse `get('multiple.otherveg.per.coiid', envir=soilDB.env)` for component record IDs (coiid)")
}

# uncode metadata domains
Expand Down Expand Up @@ -829,7 +829,10 @@ get_component_horizon_data_from_NASIS_db <- function(SS = TRUE,
INNER JOIN datamapunit_View_1 dmu ON dmu.dmuiid = co.dmuiidref

ORDER BY dmudesc, comppct_r DESC, compname ASC, hzdept_r ASC;"


q2 <- "SELECT chiidref, fragvol_r AS fragvol, fragsize_r, fragshp, fraghard FROM chfrags_View_1"
q3 <- "SELECT chiidref, huartvol_r AS huartvol, huartsize_r, huartco, huartshp, huartrnd, huartpen, huartsafety, huartper FROM chhuarts_View_1"

channel <- dbConnectNASIS(dsn)

if (inherits(channel, 'try-error'))
Expand All @@ -838,19 +841,27 @@ get_component_horizon_data_from_NASIS_db <- function(SS = TRUE,
# toggle selected set vs. local DB
if (SS == FALSE) {
q <- gsub(pattern = '_View_1', replacement = '', x = q, fixed = TRUE)
q2 <- gsub(pattern = '_View_1', replacement = '', x = q2, fixed = TRUE)
q3 <- gsub(pattern = '_View_1', replacement = '', x = q3, fixed = TRUE)
}

# exec query
d <- dbQueryNASIS(channel, q)
d <- dbQueryNASIS(channel, q, close = FALSE)

## TODO: better documentation for "fill" argument
# https://github.com/ncss-tech/soilDB/issues/50
# remove records what are missing horizon data
if (fill == FALSE) {
d <- d[!is.na(d$chiid), ]
}


# "sieving" chfrags, chuarts tables for parity with fetchNASIS("pedons") @horizons slot columns
chf <- simplifyFragmentData(dbQueryNASIS(channel, q2, close = FALSE), id.var = 'chiidref')
cha <- simplifyArtifactData(dbQueryNASIS(channel, q3), id.var = 'chiidref')
d2 <- merge(d, chf, by.x = "chiid", by.y = "chiidref", all.x = TRUE, sort = FALSE)
d3 <- merge(d2, cha, by.x = "chiid", by.y = "chiidref", all.x = TRUE, sort = FALSE)

# done
return(d)
return(d3)
}

4 changes: 2 additions & 2 deletions R/get_component_from_SDA.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ get_component_from_SDA <- function(WHERE = NULL, duplicates = FALSE, childs = TR
idx <- d.component$cokey %in% cokeys

assign('component.ecosite.problems', value = cokeys, envir = soilDB.env)
message("-> QC: multiple ecosites linked to 1 component use `get('component.ecosite.problems', envir = soilDB.env)` for related cokey values")
message("-> QC: multiple ecosites linked to 1 component\n\tUse `get('component.ecosite.problems', envir = soilDB.env)` for component keys (cokey)")

nodups <- {
d.component[idx, ] ->.;
Expand Down Expand Up @@ -834,7 +834,7 @@ fetchSDA <- function(WHERE = NULL, duplicates = FALSE, childs = TRUE,

# print any messages on possible data quality problems:
if (exists('component.hz.problems', envir=soilDB.env))
message("-> QC: horizon errors detected, use `get('component.hz.problems', envir=soilDB.env)` for related cokey values")
message("-> QC: horizon errors detected, use `get('component.hz.problems', envir=soilDB.env)` for component keys (cokey)")

# done, return SPC
return(f.chorizon)
Expand Down
14 changes: 6 additions & 8 deletions R/simplfyFragmentData.R
Original file line number Diff line number Diff line change
Expand Up @@ -168,23 +168,21 @@ simplifyFragmentData <- function(rf, id.var, nullFragsAreZero=TRUE) {

result.columns <- c('phiid', frag.classes, "total_frags_pct", "total_frags_pct_nopf")

# first of all, we can't do anything if the fragment volume is NA
# warn the user and remove the offending records
if(any(is.na(rf$fragvol))) {
rf <- rf[which(!is.na(rf$fragvol)), ]
warning('some records are missing rock fragment volume, these have been removed', call. = FALSE)
}

# warn the user and remove the NA records

# if all fragvol are NA then rf is an empty data.frame and we are done
if(nrow(rf[which(!is.na(rf$fragvol)),]) == 0) {
warning('all records are missing rock fragment volume (NULL). buffering result with NA. will be converted to zero if nullFragsAreZero = TRUE.', call. = FALSE)
message('all records are missing rock fragment volume')
dat <- as.data.frame(t(rep(NA, length(result.columns))))
for(i in 1:length(rf$phiid)) {
dat[i,] <- dat[1,]
dat[i,which(result.columns == id.var)] <- rf[[id.var]][i]
}
colnames(dat) <- result.columns
return(dat)
} else if(any(is.na(rf$fragvol))) {
rf <- rf[which(!is.na(rf$fragvol)), ]
message('NOTE: some records are missing rock fragment volume')
}

# extract classes
Expand Down
19 changes: 8 additions & 11 deletions R/simplifyArtifactData.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,21 @@ simplifyArtifactData <- function(art, id.var, nullFragsAreZero = nullFragsAreZer

result.columns <- c(id.var, art.classes, "total_art_pct", "huartvol_cohesive","huartvol_penetrable", "huartvol_innocuous", "huartvol_persistent")

# first of all, we can't do anything if the fragment volume is NA
# warn the user and remove the offending records
if(any(is.na(art$huartvol))) {
art <- art[which(!is.na(art$huartvol)), ]
warning('some records are missing artifact volume, these have been removed', call. = FALSE)
}

# warn the user and remove the NA records

# if all fragvol are NA then rf is an empty data.frame and we are done
if(nrow(art[which(!is.na(art$huartvol)), ]) == 0) {
warning('all records are missing artifact volume (NULL). buffering result with NA. will be converted to zero if nullFragsAreZero = TRUE.', call. = FALSE)
if (nrow(art[which(!is.na(art$huartvol)),]) == 0) {
message('NOTE: all records are missing artifact volume')
dat <- as.data.frame(t(rep(NA, length(result.columns))))
for(i in 1:length(art[[id.var]])) {
dat[i,] <- dat[1,]
dat[i,which(result.columns == id.var)] <- art[[id.var]][i]
}
colnames(dat) <- result.columns
return(dat)
} else if (any(is.na(art$huartvol))) {
art <- art[which(!is.na(art$huartvol)), ]
message('NOTE: some records are missing artifact volume')
}

# extract classes
Expand All @@ -97,7 +95,6 @@ simplifyArtifactData <- function(art, id.var, nullFragsAreZero = nullFragsAreZer
# fix defualt names from aggregate()
names(art.sums) <- c(id.var, 'class', 'volume')


## NOTE: we set factor levels here because the reshaping (long->wide) needs to account for all possible classes
## NOTE: this must include all classes that related functions return
# set levels of classes
Expand Down Expand Up @@ -141,7 +138,7 @@ simplifyArtifactData <- function(art, id.var, nullFragsAreZero = nullFragsAreZer
# compute total fragments
# trap no frag condition
# includes unspecified class
if(ncol(art.wide) > 1) {
if (ncol(art.wide) > 1) {
# calculate another column for total RF, ignoring parafractions
# index of columns to ignore, para*
#idx.pf <- grep(names(art.wide), pattern="para")
Expand Down
8 changes: 5 additions & 3 deletions tests/testthat/test-simplifyArtifactData.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ test_that("simplifyArtifactData when missing fragment sizes, low/rv/high", {
d.missing.size[4,] <- d.missing.size[3,]
d.missing.size[4,] <- NA
d.missing.size[4,'phiid'] <- "10102"
expect_warning(res <- simplifyArtifactData(d.missing.size, id.var = 'phiid', nullFragsAreZero = TRUE))
expect_message(res <- simplifyArtifactData(d.missing.size, id.var = 'phiid', nullFragsAreZero = TRUE))

# rows missing fragvol should be removed from the simplified result
expect_true(nrow(d.missing.size) == 4)
Expand All @@ -171,14 +171,16 @@ test_that("simplifyArtifactData warning generated when NA in huartvol", {
d.missing.artvol <- d.artifact.hz
d.missing.artvol$huartvol <- NA
d.missing.artvol[1,'huartvol'] <- 10
expect_warning(simplifyArtifactData(d.missing.artvol, id.var = 'phiid', nullFragsAreZero = TRUE), regexp = 'some records are missing artifact volume, these have been removed')
expect_message(simplifyArtifactData(d.missing.artvol, id.var = 'phiid', nullFragsAreZero = TRUE),
regexp = 'some records are missing artifact volume')
})


test_that("simplifyArtifactData warning generated when all fragvol are NA", {
d.all.NA.artvol <- d.artifact.hz
d.all.NA.artvol$huartvol <- NA
expect_warning(simplifyArtifactData(d.all.NA.artvol, id.var = 'phiid', nullFragsAreZero = TRUE), regexp = 'all records are missing artifact volume')
expect_message(simplifyArtifactData(d.all.NA.artvol, id.var = 'phiid', nullFragsAreZero = TRUE),
regexp = 'all records are missing artifact volume')
})

test_that("simplifyArtifactData nullFragsAreZero works as expected", {
Expand Down
22 changes: 8 additions & 14 deletions tests/testthat/test-simplifyFragmentData.R
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,6 @@ d.missing.size <-
)




test_that(".seive correctly skips / pads NA", {
expect_equal(soilDB:::.sieve(diameter = c(NA, 55)), c(NA, 'gravel'))
})
Expand Down Expand Up @@ -295,7 +293,6 @@ test_that("seive returns correct size class, flat, fragments", {
})



test_that("seive returns correct size class, nonflat, parafragments", {

expect_equal(soilDB:::.sieve(diameter = 4, flat = FALSE, para = TRUE), 'parafine_gravel')
Expand Down Expand Up @@ -329,7 +326,6 @@ test_that("seive returns correct size class, flat, parafragments", {
})



## new tests for rockFragmentSieve: missing frag sizes / unspecified class
test_that("rockFragmentSieve puts fragments without fragsize into 'unspecified' class", {

Expand All @@ -341,8 +337,6 @@ test_that("rockFragmentSieve puts fragments without fragsize into 'unspecified'
})




test_that("rockFragmentSieve assumptions are applied, results correct", {

d <- data.frame(fragvol=NA, fragsize_l=NA, fragsize_r=50, fragsize_h=NA, fragshp=NA, fraghard=NA)
Expand Down Expand Up @@ -422,8 +416,6 @@ test_that("rockFragmentSieve always uses the RV, computed when missing", {
})




test_that("rockFragmentSieve complex sample data from NASIS, single horizon", {

# pretty common, many fragments specified for a single horizon
Expand Down Expand Up @@ -456,13 +448,12 @@ test_that("simplifyFragmentData complex sample data from NASIS, single horizon",
})



test_that("simplifyFragmentData when missing fragment sizes, low/rv/high", {

# all fragments are coallated into the unspecified column
# totals should be correct
# some horizons have no fragment records, should generate a warning
expect_warning(res <- simplifyFragmentData(d.missing.size, id.var = 'phiid', nullFragsAreZero = TRUE))
expect_message( { res <- simplifyFragmentData(d.missing.size, id.var = 'phiid', nullFragsAreZero = TRUE) } )

# rows missing fragvol should be removed from the simplified result
expect_true(nrow(d.missing.size) == 12)
Expand All @@ -476,20 +467,23 @@ test_that("simplifyFragmentData when missing fragment sizes, low/rv/high", {

test_that("simplifyFragmentData warning generated when NA in fragvol", {

expect_warning(simplifyFragmentData(d.missing.fragvol, id.var = 'phiid', nullFragsAreZero = TRUE), regexp = 'some records are missing rock fragment volume')
expect_message(simplifyFragmentData(d.missing.fragvol, id.var = 'phiid', nullFragsAreZero = TRUE),
regexp = 'some records are missing rock fragment volume')

})


test_that("simplifyFragmentData warning generated when all fragvol are NA", {

expect_warning(simplifyFragmentData(d.all.NA.fragvol, id.var = 'phiid', nullFragsAreZero = TRUE), regexp = 'all records are missing rock fragment volume')
expect_message(simplifyFragmentData(d.all.NA.fragvol, id.var = 'phiid', nullFragsAreZero = TRUE),
regexp = 'all records are missing rock fragment volume')

})


test_that("simplifyFragmentData nullFragsAreZero works as expected", {
expect_warning(a <- simplifyFragmentData(d.missing.fragvol, id.var = 'phiid', nullFragsAreZero = FALSE))
expect_warning(b <- simplifyFragmentData(d.missing.fragvol, id.var = 'phiid', nullFragsAreZero = TRUE))
expect_message( { a <- simplifyFragmentData(d.missing.fragvol, id.var = 'phiid', nullFragsAreZero = FALSE) } )
expect_message( { b <- simplifyFragmentData(d.missing.fragvol, id.var = 'phiid', nullFragsAreZero = TRUE) } )
expect_equal(as.logical(is.na(a)),
c(FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE,
FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE))
Expand Down