Skip to content

Commit

Permalink
add qualifier_group column for both primary and supplementary quali…
Browse files Browse the repository at this point in the history
…fiers

 - used to identify related terms
  • Loading branch information
brownag committed May 7, 2024
1 parent f03da16 commit 3ec0a49
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 11 deletions.
12 changes: 6 additions & 6 deletions R/data-documentation.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,12 @@
#' World Reference Base for Soil Resources (4th Edition, 2022)
#'
#' A _list_ containing three _data.frame_ elements `"rsg"`, `"pq"`, and `"sq"` providing information on the 'Representative Soil Groups', 'Principal Qualifiers,' and 'Supplementary Qualifiers,' respectively.
#'
#'
#' @details
#'
#' Each element has the column `"code"` which is a number (1-32) referring to the position in the Reference Soil Groups, and the column `"reference_soil_group"` which is the corresponding group name.
#' - The _data.frame_ `"rsg"` has column `"criteria"`, describing the logical criteria for each Reference Soil Group.
#' - The _data.frame_ `"pq"` has qualifier names in column `"principal_qualifier"` and `"sq"` has column `"supplementary_qualifier"`. The `"pq"` and `"sq"` qualifier names may be single qualifiers, or several related qualifiers separated with a forward slash `" / "`
#'
#' Each element has the column `"code"` which is a number (1-32) referring to the position in the Reference Soil Groups, and the column `"reference_soil_group"` which is the corresponding group name.
#' - The _data.frame_ `"rsg"` has column `"criteria"`, describing the logical criteria for each Reference Soil Group.
#' - The _data.frame_ `"pq"` has qualifier names in column `"principal_qualifier"` and `"sq"` has column `"supplementary_qualifier"`. The `"pq"` and `"sq"` qualifier name columns (`primary_qualifier` and `supplementary_qualifier`) contain individual qualifier terms. Related qualifiers can be identified using `qualifier_group` column which is derived from qualifier names separated with a forward slash `" / "`
#'
#' @references
#'
Expand All @@ -114,4 +114,4 @@
#'
#' @keywords datasets
#'
"wrb_4th_2022"
"wrb_4th_2022"
18 changes: 14 additions & 4 deletions data-raw/wrb_4th_2022.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
library(pdftools)

## SETUP
##
##
# dir.create("misc/WRB2022")
# download.file("https://wrb.isric.org/files/WRB_fourth_edition_2022-12-18.pdf",
# destfile = "misc/WRB2022/WRB_fourth_edition_2022-12-18.pdf")
Expand All @@ -18,7 +18,7 @@ library(pdftools)
## nope
# x <- pdf_data("misc/WRB2022/WRB_fourth_edition_2022-12-18.pdf")
# y <- do.call('rbind', x)
#
#

x <- readLines("misc/WRB2022/WRB_RSG.txt")
x <- gsub("\u003c", "<", gsub("\u003E", ">", gsub("\u2264", "<=", gsub("\u2265", ">=", x))))
Expand Down Expand Up @@ -59,7 +59,12 @@ z <- lapply(xx, function(y) {
names(z) <- z.names

wrb_pq <- do.call('rbind', lapply(seq(z), function(i) {
data.frame(code = i, reference_soil_group = z.names[i], principal_qualifiers = z[[z.names[i]]])
pq <- lapply(strsplit(z[[z.names[i]]], "/"), trimws)
pg <- lapply(seq(pq), function(j) rep(z[[z.names[i]]][j], length(pq[[j]])))
data.frame(code = i,
reference_soil_group = z.names[i],
qualifier_group = unlist(pg),
principal_qualifiers = unlist(pq))
}))
rownames(wrb_pq) <- NULL
# View(wrb_pq)
Expand All @@ -79,7 +84,12 @@ z <- lapply(xx, function(y) {
names(z) <- z.names

wrb_sq <- do.call('rbind', lapply(seq(z), function(i) {
data.frame(code = i, reference_soil_group = z.names[i], supplementary_qualifiers = z[[z.names[i]]])
sq <- lapply(strsplit(z[[z.names[i]]], "/"), trimws)
sg <- lapply(seq(sq), function(j) rep(z[[z.names[i]]][j], length(sq[[j]])))
data.frame(code = i,
reference_soil_group = z.names[i],
qualifier_group = unlist(sg),
supplementary_qualifiers = unlist(sq))
}))
rownames(wrb_sq) <- NULL
# View(wrb_sq)
Expand Down
Binary file modified data/wrb_4th_2022.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion man/wrb_4th_2022.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 3ec0a49

Please sign in to comment.