-
Notifications
You must be signed in to change notification settings - Fork 12
/
train_rl.py
97 lines (88 loc) · 2.46 KB
/
train_rl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python3
# Copyright (without_goal+curr_emb) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import sys
if '/opt/ros/kinetic/lib/python2.7/dist-packages' in sys.path:
sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages')
import argparse
import random
import numpy as np
from configs.default import get_config
from trainer.rl.ppo.ppo_trainer_memory import PPOTrainer_Memory
#from trainer.rl import ppo
import env_utils
import os
os.environ['GLOG_minloglevel'] = "2"
os.environ['MAGNUM_LOG'] = "quiet"
parser = argparse.ArgumentParser()
parser.add_argument(
"--config",
type=str,
required=True,
help="path to config yaml containing info about experiment",
)
parser.add_argument(
"--version",
type=str,
required=True,
help="version of the training experiment",
)
parser.add_argument(
"--gpu",
type=str,
default="0",
help="gpus",
)
parser.add_argument(
"--stop",
action='store_true',
default=False,
help="include stop action or not",
)
parser.add_argument(
"--no-noise",
action='store_true',
default=False,
help="include noise or not",
)
parser.add_argument(
"--diff",
default='hard',
choices=['easy', 'medium', 'hard'],
help="episode difficulty",
)
parser.add_argument(
"--seed",
type=str,
default="none"
)
parser.add_argument(
"--render",
action='store_true',
default=False,
help="This will save the episode videos, periodically",
)
arguments = parser.parse_args()
os.environ["CUDA_VISIBLE_DEVICES"] = arguments.gpu
def main():
run_exp(**vars(arguments))
def run_exp(config: str, opts=None, *args, **kwargs) -> None:
config = get_config(config, arguments.version)
config.defrost()
config.noisy_actuation = not arguments.no_noise
config.DIFFICULTY = arguments.diff
config.render = arguments.render
if arguments.stop:
config.TASK_CONFIG.TASK.POSSIBLE_ACTIONS = ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"]
else:
config.TASK_CONFIG.TASK.POSSIBLE_ACTIONS = ["MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"]
if arguments.seed != 'none':
config.TASK_CONFIG.SEED = int(arguments.seed)
config.freeze()
random.seed(config.TASK_CONFIG.SEED)
np.random.seed(config.TASK_CONFIG.SEED)
trainer = PPOTrainer_Memory(config)
trainer.train()
if __name__ == "__main__":
main()