-
Notifications
You must be signed in to change notification settings - Fork 2
/
make-local-tabular-DB.R
128 lines (81 loc) · 2.71 KB
/
make-local-tabular-DB.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
## Notes:
# if working with a huge directory of .zip files
# ~ 7 minutes on 4-1
# time parallel --eta unzip ::: *.zip
# A lot of free space is required for all of SSURGO
# FY24 SSURGO (uncompressed): 217GB
# Latest STATSGO (uncompressed): 1GB
# Final SSURGO + STATSGO tabular database: 14GB
## will fail on a slow connection, WSS / SDA are shutdown most nights
library(soilDB)
library(RSQLite)
library(DBI)
## paths
# soilweb
.exdir1 <- 'statsgo'
.exdir2 <- 'ssurgo'
.dbfile <- 'ssurgo-combined.sqlite'
# local machine
.exdir1 <- 'e:/temp/statsgo'
.exdir2 <- 'e:/temp/ssurgo'
.dbfile <- 'e:/temp/ssurgo-combined.sqlite'
## SSAs to iterate over
q <- "SELECT areasymbol, saverest FROM sacatalog WHERE areasymbol NOT IN ('US', 'MXNL001');"
x <- SDA_query(q)
nrow(x)
## download
# notes:
# * all of SSURGO will never finish at home / Sonora MLRA office
# * must increase curl timeout on a slow connection (STATSGO will fail with defaults)
# * parallel downloads would be faster (1 hour on soilmap 2-1)
# STATSGO
# * gov machine, Sonora MLRA office: 13 minutes
# * soilmap 4-1: 11 minutes
options('soilDB.timeout' = 1e6)
system.time(
downloadSSURGO(areasymbols = 'US', exdir = .exdir1, include_template = FALSE, remove_zip = TRUE, extract = TRUE, overwrite = TRUE, db = 'STATSGO')
)
# SSURGO
system.time(
downloadSSURGO(areasymbols = x$areasymbol, exdir = .exdir2, include_template = FALSE, remove_zip = TRUE, extract = TRUE, overwrite = TRUE, db = 'SSURGO')
)
## create database
# fresh start, remove whatever was left from last time
unlink(.dbfile)
# first pass, STATSGO
# * gov machine: ~ 91 seconds
# * soilweb 4-1: ~ 71 seconds
system.time(
createSSURGO(filename = .dbfile, exdir = .exdir1, include_spatial = FALSE, overwrite = TRUE)
)
# second pass, SSURGO
# * gov machine: (not possible yet)
# * soilweb 4-1: 32 minutes
system.time(
createSSURGO(filename = .dbfile, exdir = .exdir2, include_spatial = FALSE, overwrite = FALSE)
)
## connect to finish up
db <- dbConnect(RSQLite::SQLite(), .dbfile)
# cleanup
# ~ 5 minutes
dbExecute(db, 'VACUUM;')
# check indices
dbGetQuery(db, 'PRAGMA index_list(mapunit);')
dbGetQuery(db, "select type, name, tbl_name, sql
FROM sqlite_master
WHERE type = 'index' AND tbl_name = 'chorizon' ;")
## TODO additional or specialized indexing?
## check
# list tables
dbListTables(db)
# STATSGO
dbGetQuery(db, "SELECT mukey, muname, mukind FROM mapunit WHERE mukey = '658083' ;")
# SSURGO
dbGetQuery(db, "SELECT mukey, muname, mukind FROM mapunit WHERE mukey = '2600481' ;")
# simple query
dbGetQuery(db, 'SELECT cokey, compname, comppct_r, majcompflag FROM component LIMIT 5;')
# be sure to close connection / file
dbDisconnect(db)
## cleanup
rm(list = ls())
gc(reset = TRUE)