Skip to content

Commit

Permalink
ideas related to #43
Browse files Browse the repository at this point in the history
  • Loading branch information
dylanbeaudette committed Mar 9, 2023
1 parent bf2657e commit 6517954
Showing 1 changed file with 29 additions and 3 deletions.
32 changes: 29 additions & 3 deletions misc/print-ST.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,45 @@ library(data.tree)
## TODO:
# * add subgroup acreage
# * 13th ed.
# * compact style of dir_tree(), but in proper order
# * add series count to subgroups

# 12th ed.
data("ST")

# latest SC DB
sc <- get_soilseries_from_NASIS()

# susbet / rename columns for simpler joining
sc <- sc[, c('soilseriesname', 'taxclname', 'taxsubgrp')]
names(sc) <- c('series', 'family', 'subgroup')

# combine ST hierarchy + series by subgroup
# this will introduct family details
z <- merge(ST, sc, by.x = 'subgroup', all.x = TRUE, sort = FALSE)
head(z)

# normalization via lower case
z$family <- tolower(z$family)
z$series <- tolower(z$series)

# remove subgroup component of family spec
z$f <- NA_character_
for(i in 1:nrow(z)) {
z$f[i] <- gsub(pattern = z$subgroup[i], replacement = '', z$family[i], fixed = TRUE)
}

# remove white space
z$f <- trimws(z$f, which = 'both')

# ok
head(z)


# ordering used by 'Keys
z <- z[order(z$code, method = 'radix'), ]


## hack #1: use directory listing for a compact representation
# note: order isn't correct
td <- tempdir()
unlink(file.path(td, 'ST'), recursive = TRUE)

Expand Down Expand Up @@ -70,16 +79,20 @@ for(i in 1:nrow(z)) {
dir.create(path = fp, recursive = TRUE)
}


# dump output file text file
setwd(td)

sink('e:/temp/st12.txt')
dir_tree(file.path('ST'))
sink()

# cleanup
unlink(file.path(td, 'ST'), recursive = TRUE)



## hack #2: use directory listing for a compact representation
## prefix with taxon codes for correct ordering
for(i in 1:nrow(z)) {

# account for subgroups without series
Expand Down Expand Up @@ -110,25 +123,35 @@ for(i in 1:nrow(z)) {
dir.create(path = fp, recursive = TRUE)
}

# save output to text file
setwd(td)

sink('e:/temp/st12-codes.txt')
dir_tree(file.path('ST'))
sink()

# cleanup
unlink(file.path(td, 'ST'), recursive = TRUE)



## correct ordering via data.tree, as long as order of `z` is correct
# less compact, but doesn't require crazy file system manipulation

# required columns only, smaller data.tree
v <- c('order', 'suborder', 'greatgroup', 'subgroup', 'f', 'series', 'path')

# init data.tree object
z$path <- sprintf("ST/%s/%s/%s/%s/%s/%s", z$order, z$suborder, z$greatgroup, z$subgroup, z$f, z$series)
n <- as.Node(z[, v], pathName = 'path')

## missing family / series result in an ugly tree, prune accordingly

# prune missing family / series
pf <- function(i) {

# NA due to left join
# note odd approach required, matching to 'NA' vs. is.na()
if(GetAttribute(i, 'name') == 'NA') {
return(FALSE)
} else {
Expand All @@ -137,10 +160,13 @@ pf <- function(i) {

}

# dump to text file
options('max.print' = 1e7)
sink('e:/temp/st12-DT.txt')
print(n, limit = NULL, pruneFun = pf)
sink()

options('max.print' = 1000)



0 comments on commit 6517954

Please sign in to comment.