Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update OSD parsing: effervescence, drop ordered factors #68

Merged
merged 17 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
40 changes: 14 additions & 26 deletions R/parseOSD_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@
m <- tolower(m)

# convert to ordered factor
m <- factor(m, levels = textures, ordered = TRUE)
#
# m <- factor(m, levels = textures, ordered = TRUE)
#
# factors cannot be preserved in JSON output, and wont work for multiple classes/ranges of classes

return(m)
}
Expand Down Expand Up @@ -196,33 +199,19 @@
m <- tolower(m)

# return as an ordered factor acidic -> basic
m <- factor(m, levels = pH_classes, ordered = TRUE)
# m <- factor(m, levels = pH_classes, ordered = TRUE)
# factors cannot be preserved in JSON output, and wont work for multiple classes/ranges of classes

return(m)
}



# vectorized parsing of effervescence class
#' @importFrom stringi stri_match
.parse_eff_class <- function(text) {

# mineral texture classes
.classes <- c('noneffervescent', 'very slightly effervescent', 'slightly effervescent', 'strongly effervescent', 'violently effervescent')

## 2019-05-29: generalized for all non-greedy, exact matching
m <- .findClass(needle = .classes, haystack = text)
m <- tolower(m)

# return as an ordered factor acidic -> basic
m <- factor(m, levels = .classes, ordered = TRUE)

return(m)

.parse_eff_class <- function(x) {
.zerochar_to_na(gsub("^.*[;,]? ?\\b(very [a-z]+ effervescen[tce]+ (to|and|or) [a-z ]+ ?effervescen[tce]+).*$|^.*[;,]? ?\\b([a-z]+ ?effervescen[tce]+ (to|and|or) [a-z]+ ?effervescen[tce]+).*$|^.*[;,] ?\\b(very [a-z]+ effervescen[tce]+).*$|^.*[;,] ?\\b([a-z]+ ?effervescen[tce]+).*$|^.*[;,]? ?\\b(very [a-z]+ effervescen[tce]+).*$|^.*[;,]? ?\\b([a-z]+ ?effervescen[tce]+).*$|.*",
"\\1\\3\\5\\6\\7\\8", x, ignore.case = TRUE))
# factors cannot be preserved in JSON output, and wont work for multiple classes/ranges of classes
}



# vectorized parsing of drainage class
#' @importFrom stringi stri_match
.parse_drainage_class <- function(text) {
Expand All @@ -248,7 +237,6 @@

# return as an ordered factor
# m <- factor(m, levels = classes, ordered = TRUE)

# factors cannot be preserved in JSON output, and wont work for multiple classes/ranges of classes

return(m)
Expand Down Expand Up @@ -324,11 +312,11 @@
# "O" = "0"
# "l" = "1"
## ideas: http://stackoverflow.com/questions/15474741/python-regex-optional-capture-group
## TODO: it isn't clear if the new files will be in

## TODO: it isn't clear if the new files will be in
# expect em dashes (\u2014) used after horizon designation as of May 2023
# https://github.com/ncss-tech/SoilKnowledgeBase/issues/64

# detect horizons with both top and bottom depths
hz.rule <- "([\\^\\'\\/a-zA-Z0-9]+)\\s*[-=\u2014]+\\s*([Ol0-9.]+)\\s*?(to|-)?\\s+?([Ol0-9.]+)\\s*?(in|inches|cm|centimeters)"

Expand Down Expand Up @@ -494,7 +482,7 @@
res$pH <- .parse_pH(narrative.data$narrative)
res$pH_class <- .parse_pH_class(narrative.data$narrative)
res$eff_class <- .parse_eff_class(narrative.data$narrative)

bdy <- .parse_hz_boundary(narrative.data$narrative)
res$distinctness <- bdy$distinctness
res$topography <- bdy$topography
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/AAZDAHL.json
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "violent effervescence",
"distinctness": "gradual",
"topography": "wavy",
"narrative": "Bk--20 to 36 inches; grayish brown (2.5Y 5/2) clay loam; weak medium subangular blocky structure; friable; about 5 percent coarse fragments; violent effervescence; moderately alkaline; gradual wavy boundary. (0 to 20 inches thick)"
Expand All @@ -158,7 +158,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "strong effervescence",
"distinctness": "NA",
"topography": "NA",
"narrative": "C--36 to 60 inches; light olive brown (2.5Y 5/4) clay loam; common fine faint yellowish brown (10YR 5/6) and few fine prominent gray (5Y 5/1) mottles; massive; friable; about 5 percent coarse fragments; strong effervescence; moderately alkaline."
Expand Down
8 changes: 4 additions & 4 deletions inst/extdata/OSD/A/ABERDEEN.json
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "slightly alkaline",
"eff_class": "NA",
"eff_class": "strong effervescence",
"distinctness": "clear",
"topography": "wavy",
"narrative": "Bkyz1--26 to 31 inches; light gray (2.5Y 7/2) silty clay loam, grayish brown (2.5Y 5/2) moist; weak medium prismatic structure parting to weak medium subangular blocky; hard, firm, sticky and plastic; common fine nests of gypsum and other salts; common very fine accumulations of carbonate; strong effervescence; slightly alkaline; clear wavy boundary."
Expand All @@ -204,7 +204,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "slightly alkaline",
"eff_class": "NA",
"eff_class": "strong effervescence",
"distinctness": "gradual",
"topography": "wavy",
"narrative": "Bkyz2--31 to 38 inches; light brownish gray (2.5Y 6/2) silty clay loam, dark grayish brown (2.5Y 4/2) moist; weak medium prismatic structure parting to weak medium subangular blocky; hard, firm, sticky and plastic; common fine nests of gypsum and other salts; common very fine accumulations of carbonate; strong effervescence; slightly alkaline; gradual wavy boundary. (Combined Bkyz horizons 0 to 20 inches thick)"
Expand All @@ -227,7 +227,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "slightly alkaline",
"eff_class": "NA",
"eff_class": "slight effervescence",
"distinctness": "gradual",
"topography": "wavy",
"narrative": "C1--38 to 51 inches; light gray (2.5Y 7/2) silt loam, grayish brown (2.5Y 5/2) moist; few fine prominent of yellowish brown (10YR 5/6) redoximorphic concentrations and few fine distinct gray (10YR 5/1) moist redoximorphic depletions; massive; hard, friable, slightly sticky and slightly plastic; few fine threads and nests of gypsum and other salts; slight effervescence; slightly alkaline; gradual wavy boundary."
Expand All @@ -250,7 +250,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "slightly alkaline",
"eff_class": "NA",
"eff_class": "slight effervescence",
"distinctness": "NA",
"topography": "NA",
"narrative": "C2--51 to 60 inches; pale yellow (2.5Y 7/4) silt loam, laminated with thin layers of silty clay and very fine sandy loam, light olive brown (2.5Y 5/4) moist; common fine prominent yellowish brown (10YR 5/6) and strong brown (7.5YR 5/6) redoximorphic concentrations, and gray (10YR 5/1) moist redoximorphic depletions; massive; laminations 1 to 3 mm thick; slightly hard, friable, slightly sticky and slightly plastic; slight effervescence; slightly alkaline."
Expand Down
8 changes: 4 additions & 4 deletions inst/extdata/OSD/A/ACKETT.json
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
"cf_class": "extremely cobbly",
"pH": 8.4,
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "violent effervescence",
"distinctness": "abrupt",
"topography": "smooth",
"narrative": "2Btkq--15 to 20 cm; light gray (10YR 7/2) extremely cobbly clay, very pale brown (10YR 7/3) moist; massive; extremely hard, extremely firm, moderately sticky and moderately plastic; common distinct clay films on faces of peds and in pores; many continuous silica cemented laminations throughout; fractures are 0.6 cm wide and are 20 cm apart; few very fine roots in fractures; few very fine tubular pores; 70 percent cobbles, 10 percent gravel; violent effervescence; moderately alkaline (pH 8.4); abrupt smooth boundary. (0 to 8 cm thick)"
Expand All @@ -181,7 +181,7 @@
"cf_class": "very gravelly",
"pH": 8.4,
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "violent effervescence",
"distinctness": "abrupt",
"topography": "smooth",
"narrative": "2Btk--20 to 36 cm; very pale brown (10YR 7/3) very gravelly clay loam, pale brown (10YR 6/3) moist; weak medium subangular blocky structure; hard, firm, moderately sticky and moderately plastic; few very fine and fine roots; few very fine tubular pores; common distinct clay films on faces of peds and in pores; 40 percent gravel size duripan fragments; violent effervescence (25 percent calcium carbonate); moderately alkaline (pH 8.4); abrupt smooth boundary. (8 to 18 cm thick)"
Expand All @@ -204,7 +204,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "NA",
"eff_class": "NA",
"eff_class": "violent effervescence",
"distinctness": "abrupt",
"topography": "smooth",
"narrative": "2Bkqm--36 to 71 cm; white (10YR 8/1) fractured indurated duripan; very pale brown (10YR 7/3) moist; massive; many continuous silica cemented laminations throughout the duripan; fractures are 0.6 cm wide and are 50 cm apart; few very fine roots in fractures of the duripan; violent effervescence; abrupt smooth boundary. (20 to 38 cm thick)"
Expand All @@ -227,7 +227,7 @@
"cf_class": "extremely gravelly",
"pH": 8.2,
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "violent effervescence",
"distinctness": "abrupt",
"topography": "smooth",
"narrative": "3Bkq--71 to 127 cm; multi-colored extremely gravelly sand; single grain; loose in places; 80 percent gravel; weak carbonate-silica cementation with carbonate pendants on undersides of gravel; violent effervescence; moderately alkaline (pH 8.2); abrupt smooth boundary. (25 to 60 cm)"
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ADE.json
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "strong effervescence",
"distinctness": "NA",
"topography": "NA",
"narrative": "C--60 to 70 inches; brownish yellow (10YR 6/6) fine sand; single grain; loose; strong effervescence; moderately alkaline."
Expand Down
8 changes: 4 additions & 4 deletions inst/extdata/OSD/A/AGAR.json
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "slightly alkaline",
"eff_class": "NA",
"eff_class": "strong effervescence",
"distinctness": "clear",
"topography": "wavy",
"narrative": "Bk1--18 to 22 inches; grayish brown (10YR 5/2) silty clay loam, dark grayish brown (10YR 4/2) moist; weak coarse prismatic structure parting to moderate medium and fine subangular blocky; hard, friable; few fine accumulations of carbonate; strong effervescence; slightly alkaline; clear wavy boundary."
Expand All @@ -181,7 +181,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "strong effervescence",
"distinctness": "gradual",
"topography": "wavy",
"narrative": "Bk2--22 to 34 inches; light brownish gray (2.5Y 6/2) silty clay loam, dark grayish brown (2.5Y 4/2) moist; weak coarse prismatic structure parting to weak medium subangular blocky; hard, friable; common medium accumulations of carbonate; strong effervescence; moderately alkaline; gradual wavy boundary."
Expand All @@ -204,7 +204,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "strong effervescence",
"distinctness": "gradual",
"topography": "wavy",
"narrative": "Bk3--34 to 40 inches; light brownish gray (2.5Y 6/2) silt loam, grayish brown (2.5Y 5/2) moist; weak medium subangular blocky structure; hard, friable; common medium accumulations of carbonate; strong effervescence; moderately alkaline; gradual wavy boundary. (Combined Bk horizon is 10 to 25 inches.)"
Expand All @@ -227,7 +227,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "strong effervescence",
"distinctness": "NA",
"topography": "NA",
"narrative": "C3--40 to 61 inches; light brownish gray (2.5Y 6/2) silt loam, dark grayish brown (2.5Y 4/2) moist; massive; slightly hard, friable; few fine and medium accumulations of carbonate; strong effervescence; moderately alkaline."
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/AGENCY.json
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "NA",
"eff_class": "NA",
"eff_class": "moderately effervescent",
"distinctness": "NA",
"topography": "NA",
"narrative": "2Crkq--29 to 33 inches; paralithic tuff with silica and calcium carbonate coatings along fractures; moderately effervescent. ( 0 to 6 inches thick )"
Expand Down
6 changes: 3 additions & 3 deletions inst/extdata/OSD/A/AGUACHIQUITA.json
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
"cf_class": "very gravelly",
"pH": 8.2,
"pH_class": "moderately alkaline",
"eff_class": "strongly effervescent",
"eff_class": "noneffervescent to strongly effervescent",
"distinctness": "clear",
"topography": "wavy",
"narrative": "Bkq--10 to 20 inches; pale brown (10YR 6/3) very gravelly coarse sandy loam, dark yellowish brown (10YR 4/4) moist; weak medium subangular blocky structure; slightly hard, very friable, nonsticky and nonplastic; common very fine, few fine and medium roots; few very fine and fine interstitial and tubular pores; 50 percent pebbles, 1 percent cobbles and 0.1 percent stones; common fine (1 to 2 millimeter) calcium carbonate and silica coats on bottoms of rock fragments; noneffervescent to strongly effervescent; moderately alkaline (pH 8.2); clear wavy boundary. (8 to 20 inches thick)"
Expand All @@ -158,7 +158,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "NA",
"eff_class": "strongly effervescent",
"eff_class": "noneffervescent to strongly effervescent",
"distinctness": "clear",
"topography": "wavy",
"narrative": "Bkqm1--20 to 31 inches; light brown (7.5YR 6/3) extremely weakly cemented duripan, brown (7.5YR 4/4) moist; massive; moderately hard, friable, brittle; few very fine and fine roots; few very fine and fine interstitial pores; 50 percent pebbles, 3 percent cobbles, and 1 percent stones; common fine (1 to 2 millimeter) calcium carbonate and silica coats on bottoms of rock fragments; noneffervescent to strongly effervescent; clear wavy boundary."
Expand All @@ -181,7 +181,7 @@
"cf_class": "NA",
"pH": "NA",
"pH_class": "NA",
"eff_class": "strongly effervescent",
"eff_class": "noneffervescent to strongly effervescent",
"distinctness": "abrupt",
"topography": "wavy",
"narrative": "Bkqm2--31 to 43 inches; light brown (7.5YR 6/3) very weakly cemented duripan, brown (7.5YR 4/4) moist; massive; hard, firm, brittle; few very fine and fine roots; few very fine and fine interstitial pores; 50 percent pebbles, 3 percent cobbles, and 1 percent stones; common fine (1 to 2 millimeter) calcium carbonate and silica coats on bottoms of rock fragments; noneffervescent to strongly effervescent; abrupt wavy boundary. (Combined thickness of the Bkqm horizons is 15 to 35 inches thick)"
Expand Down
10 changes: 5 additions & 5 deletions inst/extdata/OSD/A/AGUILAR.json
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
"cf_class": "NA",
"pH": 8.8,
"pH_class": "strongly alkaline",
"eff_class": "NA",
"eff_class": "slight effervescence",
"distinctness": "clear",
"topography": "smooth",
"narrative": "Btn2--10 to 14 inches (25 to 36 cm); brown (10YR 5/3), silty clay, brown (10YR 4/3) moist; moderate coarse columnar structure parting to strong medium subangular blocks; extremely hard, very firm, very sticky and very plastic; few very fine and fine roots; common continuous distinct clay films on faces of peds; common fine salt threads; slight effervescence, 21 percent sodium absorption ratio; strongly alkaline (pH 8.8); clear smooth boundary. (combined thickness of the Btn horizons is 8 to 14 inches)"
Expand All @@ -158,7 +158,7 @@
"cf_class": "NA",
"pH": 8.8,
"pH_class": "strongly alkaline",
"eff_class": "NA",
"eff_class": "strong effervescence",
"distinctness": "clear",
"topography": "smooth",
"narrative": "Btny--14 to 23 inches (36 to 58 cm); brown (10YR 5/3) clay, brown (10YR 4/3) moist; moderate medium columnar structure parting to strong medium angular blocks; extremely hard, very firm, very sticky and very plastic; few very fine roots; common continuous distinct clay films on faces of peds; common medium salt and gypsum threads; strong effervescence, 21 percent sodium absorption ratio; strongly alkaline (pH 8.8); clear smooth boundary. (7 to 14 inches thick)"
Expand All @@ -181,7 +181,7 @@
"cf_class": "NA",
"pH": 8.6,
"pH_class": "strongly alkaline",
"eff_class": "NA",
"eff_class": "violent effervescence",
"distinctness": "clear",
"topography": "smooth",
"narrative": "Btkny--23 to 29 inches (58 to 74 cm); brown (10YR 5/3) clay loam, brown (10YR 4/3) moist; strong medium subangular blocky structure; extremely hard, very firm, moderately sticky and moderately plastic; few patchy distinct clay films on faces of peds; common coarse carbonate threads and common coarse salt and gypsum masses; violent effervescence, 22 percent sodium absorption ratio; strongly alkaline (pH 8.6); clear smooth boundary. (2 to 10 inches thick)"
Expand All @@ -204,7 +204,7 @@
"cf_class": "NA",
"pH": 8.4,
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "violent effervescence",
"distinctness": "gradual",
"topography": "smooth",
"narrative": "Bkny--29 to 45 inches (74 to 114 cm); grayish brown (10YR 5/2) silty clay loam, brown (10YR 5/3) moist; 18 percent sand; moderate medium subangular blocky structure; very hard, firm, moderately sticky and moderately plastic; common medium salt and gypsum threads; common medium distinct carbonate threads; violent effervescence (3 percent calcium carbonate equivalent), 22 percent sodium absorption ratio; moderately alkaline (pH 8.4); gradual smooth boundary. (10 to 20 inches thick)"
Expand All @@ -227,7 +227,7 @@
"cf_class": "NA",
"pH": 8.2,
"pH_class": "moderately alkaline",
"eff_class": "NA",
"eff_class": "violent effervescence",
"distinctness": "NA",
"topography": "NA",
"narrative": "Bny--45 to 65 inches (114 to 165 cm); grayish brown (10YR 5/2) silty clay loam, brown (10YR 5/3) moist; weak medium subangular blocky structure; very hard, firm, moderately sticky and moderately plastic; common medium salt and gypsum threads; common medium carbonate threads; violent effervescence (2 percent calcium carbonate equivalent); 21 percent sodium absorption ratio; moderately alkaline (pH 8.2)."
Expand Down
Loading