-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path3DSI_run_zonalStats.py
236 lines (179 loc) · 9.15 KB
/
3DSI_run_zonalStats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
"""
Created 4/15/2020
Iterate through a list of stacks for a given stack/zone type combo,
and run Zonal Stats
Inputs:
stackType [SGM, LVIS, or GLiHT for now]
zoneType [ATL08?, GLAS for now]
6/5/2020
Adding parameter in 3DSI_zonalStats.py to update an output GDB or not
If not running in parallel, supply output GDB/GPKG
If running in parallel, do not supply output GDB
Will instead build list of output shapefiles after parallel is
done running here and update gdb using list
9/1/2021:
Adding region var that will be hardcoded at top of script
Editing some vars if region == 'EU'
Hardcoding 'run1' in lists, will need to change as we go - see #9/1/21*
9/14: Due to excessive ADAPT slowness, going to clip .gdb to selected Landsat
squares for each run. So, creating a run variable to use for lists/.gdb
and clipping before will hopefully speed up processing [in getVarsDict()]
See #*9/14
9/28: Making necessary changes for run2 - changing comment indicator to make things easier
See #*run now for hardcoded run change areas
"""
import os
import argparse
import platform
import time
from RasterStack import RasterStack
from FeatureClass import FeatureClass
overwrite = False
region = 'EU' # or NA (default)
mainDir = '/att/gpfsfs/briskfs01/ppl/mwooten3/3DSI/ZonalStats'
runScript = '/home/mwooten3/code/HRSI/3DSI_zonalStats.py'
if region == 'EU':
mainDir = os.path.join(mainDir, 'EU')
validStackTypes = ['SGM', 'LVIS', 'GLiHT', 'Landsat', 'Tandemx']
validZonalTypes = ['ATL08', 'GLAS']
""" Dirs as of 10/13/2020
SGM: /att/gpfsfs/briskfs01/ppl/pmontesa/userfs02/projects/3dsi/stacks/Out_SGM/
LVIS: /att/gpfsfs/briskfs01/ppl/pmontesa/userfs02/projects/3dsi/stacks/out_lvis/ # these were updated to 15m stacks but location is same
GLiHT: /att/gpfsfs/briskfs01/ppl/pmontesa/userfs02/projects/3dsi/stacks/out_gliht/
Landsat: /att/gpfsfs/briskfs01/ppl/pmontesa/userfs02/data/standage/boreal_na/tile_stacks
Tandemx: /att/gpfsfs/briskfs01/ppl/mwooten3/3DSI/Tandemx_stacks
"""
def getVarsDict(stackType, zonalType):
#*9/1 #*9/14 - temp edit for splitting EU runs
#*run
runName = 'run3'
inputList = os.path.join(mainDir, '_lists', 'ls_{}_EU-{}.txt'.format(stackType, runName))
"""
if stackType == 'SGM':
inputList = os.path.join(mainDir, '_lists', 'ls_SGM.txt')
elif stackType == 'LVIS':
inputList = os.path.join(mainDir, '_lists', 'ls_LVIS.txt')
elif stackType == 'GLiHT':
inputList = os.path.join(mainDir, '_lists', 'ls_GLiHT.txt')
"""
if zonalType == 'ATL08':
#inputZonal = os.path.join(mainDir, '_zonalGdb', 'ATL08_na.gdb')
inputZonal = '/att/gpfsfs/briskfs01/ppl/mwooten3/3DSI/ATL08_na_v003.gdb'
#*9/14 - temp edit for splitting runs
#*run - but only have to change if no longer separating by run
inputZonal = '/att/gpfsfs/briskfs01/ppl/mwooten3/3DSI/ATL08_eu__run_gdbs/{}/ATL08_na_v003.gdb'.format(runName)
elif zonalType == 'GLAS':
inputZonal = '/att/gpfsfs/briskfs01/ppl/mwooten3/3DSI/GLAS_naBoreal.gdb'
outCsv = os.path.join(mainDir, '_zonalStatsGdb',
'{}__{}__ZonalStats.csv'.format(zonalType, stackType))
#*9/14 want output .csv and crane .gdb files to be put in run specific dir
#*run - but only have to change if no longer separating by run
outCsv = os.path.join(mainDir, '_zonalStatsGdb', runName,
'{}__{}__ZonalStats.csv'.format(zonalType, stackType))
# Edit vars if region is EU
if region == 'EU':
inputZonal = inputZonal.replace('na', 'eu')
outCsv = outCsv.replace('{}__'.format(zonalType), '{}-EU__'.format(zonalType))
varsDict = {'inList': inputList, 'inZonal': inputZonal, 'outCsv': outCsv}
return varsDict
def getStackList(inList, stackRange):
with open (inList, 'r') as l:
stacks = [x.strip('\r\n') for x in l.readlines()]
# TO-DO try statement here and reject stackRange input
if stackRange:
S, E = stackRange
stackList = stacks[ int(S)-1 : int(E) ]
else: # If range is None, run all stacks
stackList = stacks
return stackList
# Unpack and validate input arguments
def unpackValidateArgs(args):
# Unpack args
stackType = args['stackType']
zonalType = args['zonalType']
stackRange = args['range']
runPar = args['parallel']
# Validate inputs
if stackType not in validStackTypes:
err = "Stack type must be one of: " + \
"{}".format(", ".join(validStackTypes).strip(', '))
raise RuntimeError(err)
if zonalType not in validZonalTypes:
err = "Zonal type must be one of: " + \
"{}".format(", ".join(validZonalTypes).strip(', '))
raise RuntimeError(err)
if stackRange:
try:
stackRange = stackRange.split('-')
S, E = stackRange
except:
raise RuntimeError("Range must be supplied like: 1-20")
return stackType, zonalType, stackRange, runPar
def main(args):
# Unpack and validate arguments
stackType, zonalType, stackRange, runPar = unpackValidateArgs(args)
# Get varsDict --> {inList; inZonal; outCsv}
varsDict = getVarsDict(stackType, zonalType)
# Get list of stacks to iterate
stackList = getStackList(varsDict['inList'], stackRange)
# Get node-specific output .gdb
outGdb = varsDict['outCsv'].replace('.csv', '-{}.gpkg'.format(platform.node()))
if runPar: # If running in parallel
# Get list of output shp's that we are expecting based off stackList
shps = [os.path.join(mainDir, zonalType, stackType, RasterStack(stack).stackName,
'{}__{}__zonalStats.shp'.format(zonalType, RasterStack(stack).stackName))
for stack in stackList]
# Prepare inputs for parallel call:
call = "lscpu | awk '/^Socket.s.:/ {sockets=$NF} END {print sockets}'"
ncpu = os.popen(call).read().strip()
ncpu = int(ncpu) - 1 # all CPUs minus 1
parList = ' '.join(stackList)
print("\nProcessing {} stack files in parallel...\n".format(len(stackList)))
# Do not supply output GDB, just supply .csv
parCall = '{} -rs '.format(runScript) + '{1} -z {2} -o {3} -log'
cmd = "parallel --progress -j {} --delay 1 '{}' ::: {} ::: {} ::: {}". \
format(ncpu, parCall, parList, varsDict['inZonal'],
varsDict['outCsv'])
os.system(cmd)
# And update node-specific GDB if shp exists
print("\n\nCreating {} with completed shapefiles ({})...".format(outGdb,
time.strftime("%m-%d-%y %I:%M:%S")))
for shp in shps:
if os.path.isfile(shp):
fc = FeatureClass(shp)
if fc.nFeatures > 0: fc.addToFeatureClass(outGdb)
# Do not run in parallel
else:
# Iterate through stacks and call
print("\nProcessing {} stacks...".format(len(stackList)))
c = 0
for stack in stackList:
c+=1
print("\n{}/{}:".format(c, len(stackList)))
# Check stack's output csv's, and skip if it exists and overwrite is False
rs = RasterStack(stack)
check = os.path.join(mainDir, zonalType, stackType, rs.stackName,
'{}__{}__zonalStats.csv'.format(zonalType, rs.stackName))
# not actually needed
#if region == 'EU':
#check = check.replace('{}__'.format(zonalType), '{}-EU__'.format(zonalType))
if not overwrite:
if os.path.isfile(check):
print("\nOutputs for {} already exist\n".format(rs.stackName))
continue
# Not running in parallel, send the node-specific ouput .gdb and both should get written
cmd = 'python {} -rs {} -z {} -o {} -log'.format(runScript, stack, \
varsDict['inZonal'], outGdb)
print(cmd)
os.system(cmd)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("zonalType", type=str,
help="Zonal type (ATL08 or GLAS)")
parser.add_argument("stackType", type=str,
help="Stack type (SGM, LVIS, GLiHT, Landsat, Tandemx)")
parser.add_argument("-r", "--range", type=str,
help="Range for stack iteration (i.e. 1-20)")
parser.add_argument("-par", "--parallel", action='store_true', help="Run in parallel")
args = vars(parser.parse_args())
main(args)