-
Notifications
You must be signed in to change notification settings - Fork 0
/
options.py
executable file
·439 lines (357 loc) · 10.6 KB
/
options.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
import argparse
"""
Below are training options user can specify in command line.
Options Include:
1. Retrain boolean ("--retrain")
- If included, actor and critic neural network parameters are reset
2. Testing boolean ("--test")
- If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks.
- If not included, periods of training are by default interleaved with periods of testing to evaluate progress.
3. Show boolean ("--show")
- If included, training will be visualized
4. Train Only boolean ("--train_only")
- If included, agent will be solely in training mode and will not interleave periods of training and testing
5. Verbosity boolean ("--verbose")
- If included, summary of each transition will be printed
6. All Trans boolean ("--all_trans")
- If included, all transitions including (i) hindsight action, (ii) subgoal penalty, (iii) preliminary HER, and (iv) final HER transitions will be printed. Use below options to print out specific types of transitions.
7. Hindsight Action trans boolean ("hind_action")
- If included, prints hindsight actions transitions for each level
8. Subgoal Penalty trans ("penalty")
- If included, prints the subgoal penalty transitions
9. Preliminary HER trans ("prelim_HER")
-If included, prints the preliminary HER transitions (i.e., with TBD reward and goal components)
10. HER trans ("HER")
- If included, prints the final HER transitions for each level
11. Show Q-values ("--Q_values")
- Show Q-values for each action by each level
"""
def parse_options():
parser = argparse.ArgumentParser()
parser.add_argument(
'--retrain',
action='store_true',
help='Include to reset policy'
)
parser.add_argument(
'--test',
action='store_true',
help='Include to fix current policy'
)
parser.add_argument(
'--show',
action='store_true',
help='Include to visualize training'
)
parser.add_argument(
'--train_only',
action='store_true',
help='Include to use training mode only'
)
parser.add_argument(
'--verbose',
action='store_true',
help='Print summary of each transition'
)
parser.add_argument(
'--all_trans',
action='store_true',
help='Print summary of each transition'
)
parser.add_argument(
'--hind_action',
action='store_true',
help='Print summary of each transition'
)
parser.add_argument(
'--penalty',
action='store_true',
help='Print summary of each transition'
)
parser.add_argument(
'--prelim_HER',
action='store_true',
help='Print summary of each transition'
)
parser.add_argument(
'--HER',
action='store_true',
help='Print summary of each transition'
)
parser.add_argument(
'--Q_values',
action='store_true',
help='Print summary of each transition'
)
parser.add_argument(
'--no_target_net',
action='store_true',
help='Does not use target networks.'
)
parser.add_argument(
'--negative_distance',
action='store_true',
help='The lowest layer uses negative distance instead of sparse reward.'
)
parser.add_argument(
'--dense_reward',
action='store_true',
help='The lowest layer uses dense reward and negative distance instead of sparse reward.'
)
parser.add_argument(
'--exp_name',
type=str,
default="",
help='Experiment name.'
)
parser.add_argument(
'--variant',
type=str,
default="",
help='Variant name.'
)
parser.add_argument(
'--exp_num',
type=int,
default=0,
help='Experiment name.'
)
parser.add_argument(
'--seed',
type=int,
default=0,
help='Experiment name.'
)
parser.add_argument(
'--num_Qs',
type=int,
default=2,
help='Number of critic networks.'
)
parser.add_argument(
'--oracle',
action='store_true',
help='Use oracle instead of the first layer.'
)
parser.add_argument(
'--bayes',
action='store_true',
help='Use Bayesian critic instead of the normal one.'
)
parser.add_argument(
'--actor_grads',
action='store_true',
help='Teach higher policies to take lower policy into account.'
)
parser.add_argument(
'--orig_trans',
action='store_true',
help='Store original transition.'
)
parser.add_argument(
'--normalization',
action='store_true',
help='Normalizes the observations from the environment.'
)
parser.add_argument(
'--relative_subgoals',
action='store_true',
help='Instead of absolute goals use relative subgoals.'
)
parser.add_argument(
'--no_middle_level',
action='store_true',
help='No middle level.'
)
parser.add_argument(
'--mask_global_info',
action='store_true',
help='Mask unnecessary observations for the middle layer.'
)
parser.add_argument(
'--priority_replay',
action='store_true',
help='Uses priority replay instead of stadard replay buffer.'
)
parser.add_argument(
'--new_oracle',
action='store_true',
help='Oracle constant distance.'
)
parser.add_argument(
'--sl_oracle',
action='store_true',
help='Supervised policy by the oracle.'
)
parser.add_argument(
'--semi_oracle',
action='store_true',
help='Semisupervised policy by the oracle.'
)
parser.add_argument(
'--sac',
action='store_true',
help='Uses SoftActorCritic instead of DDPG.'
)
parser.add_argument(
'--td3',
action='store_true',
help='Uses TD3 instead of DDPG.'
)
parser.add_argument(
'--radius_learner',
action='store_true',
help='Learn radius.'
)
parser.add_argument(
'--torch',
action='store_true',
help='Use PyTorch instead of TF.'
)
parser.add_argument(
'--radam',
action='store_true',
help='Use Radam instead of Adam.'
)
parser.add_argument(
'--vpn',
action='store_true',
help='Use Value Propagation Network.'
)
parser.add_argument(
'--no_vpn_weights',
action='store_true',
help='Use deterministic vpn module.'
)
parser.add_argument(
"--save_video",
action='store_true',
help="Saves video. Can't be used with show at the same time."
)
parser.add_argument(
"--featurize_image",
action='store_true',
help="Uses simple image of the environement instead of the rendered scene."
)
parser.add_argument(
"--always_penalize",
action='store_true',
help="On the highest level always penalize for not completed goal."
)
parser.add_argument(
"--Q_penalize",
action='store_true',
help="On the highest level always penalize when the lower level's Q function think the goal is reachable."
)
parser.add_argument(
"--boost_vpn",
action='store_true',
help="Q targets are computed as annealing average of standard Q value (FC) and VPN value."
)
parser.add_argument(
"--vpn_double_conv",
action='store_true',
help="VPN layer has double convolution."
)
parser.add_argument(
"--vpn_dqn",
action='store_true',
help="VPN's V function is multiplied by pooled wall probs."
)
parser.add_argument(
'--vpn_post_processing',
action='store_true',
help='Apply extra conv layer after the VPN propagation.'
)
parser.add_argument(
'--vpn_masking',
action='store_true',
help='Destill sigma from the images and apply appropriate Gaussian mask.'
)
parser.add_argument(
'--vpn_cnn_masking',
action='store_true',
help='Let the convolution apply appropriate gaussian mask on its own.'
)
parser.add_argument(
'--vpn_cnn_masking_times',
action='store_true',
help='Let the convolution apply appropriate gaussian mask and multiply the original v_map.'
)
parser.add_argument(
'--vpn_direction_masking',
action='store_true',
help='Masking using the value flow direction.'
)
parser.add_argument(
'--covariance',
action='store_true',
help='Apply extra conv layer after the VPN propagation.'
)
parser.add_argument(
"--gaussian_attention",
action='store_true',
help="Use Gaussian kernel instead of fixed attention."
)
parser.add_argument(
"--no_attention",
action='store_true',
help="Don't use attention."
)
parser.add_argument(
"--learn_sigma",
action='store_true',
help="Learn sigma using a Bandit."
)
parser.add_argument(
"--sigma_overlay",
action='store_true',
help="When saving video, show attention."
)
parser.add_argument(
"--noisy",
action='store_true',
help="Add noise to the V map before predicting covariance matrix."
)
parser.add_argument(
"--reconstruction",
action='store_true',
help="Add reconstruction loss to the planning layer."
)
parser.add_argument(
"--wall_thresh",
action='store_true',
help="Threshold wall probabilities."
)
parser.add_argument(
"--high_penalty",
action='store_true',
help="Threshold wall probabilities."
)
parser.add_argument(
"--vpn_masking_act",
action='store_true',
help="Use gradient passing clipping instead of sigmoid for wall probs."
)
parser.add_argument(
"--window_offset",
type=int,
default=2,
help="Size of the attention window (2*offset+1) when fixed window attention is used."
)
parser.add_argument(
"--humanoid",
action='store_true',
help="Use pretrained humanoid as the low level agent."
)
parser.add_argument(
"--inject_her_policy",
action='store_true',
help="Use pretrained openai her policy."
)
parser.add_argument(
"--rrt",
action='store_true',
)
FLAGS, unparsed = parser.parse_known_args()
return FLAGS