-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathppo_wandb.py
95 lines (71 loc) · 2.75 KB
/
ppo_wandb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
'''PPO with very sparse reward
Reward Specification:
1. Edge reached - (r=1, done=True)
2. Object fell on ground - (r=-0.01, done=True)
3. Other cases - (r=-0.01, done=False)
'''
# add parent dir to find package. Only needed for source code build, pip install doesn't need it.
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0, parentdir)
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv, VecVideoRecorder
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import dummy_vec_env, vec_video_recorder
import wandb
from wandb.integration.sb3 import WandbCallback
from pushGymEnv import pushGymEnv
def main():
model_name = 'test_model0'
config = {
"policy_type": "MlpPolicy",
"total_timesteps": 25,
"env_name": "PyBullet_Custom"
}
run = wandb.init(
project='sb3',
config=config,
sync_tensorboard=True,
monitor_gym=True,
save_code=True
)
def make_env():
env = pushGymEnv() # pushGymEnv()
env = Monitor(env)
return env
env = DummyVecEnv([make_env]) # dummy_vec_env([make_env])
# env = VecVideoRecorder(env, f"./videos/{run.id}", record_video_trigger=lambda x: x % 5)
# env = VecNormalize(env, norm_obs=True, norm_reward=True)# , clip_obs=10.)
# It will check your custom environment and output additional warnings if needed
# check_env(env)
model = PPO(config['policy_type'], env, verbose=1, tensorboard_log=f"./runs/{run.id}")
model.learn(
total_timesteps=config['total_timesteps'],
progress_bar=True,
callback=WandbCallback(
gradient_save_freq=1,
model_save_path=f"./models/{run.id}",
verbose=2
)
) # Total number of env steps = 25000
print("----------------------------Training Complete----------------------------")
run.finish()
# model.save(os.path.join(currentdir, 'Results/New_Rewards', model_name))
# stats_path = os.path.join(currentdir, 'Results/New_Rewards', model_name+"vec_normalize.pkl")
# env.save(stats_path)
if __name__=="__main__":
main()
# env = make_vec_env("CartPole-v1", n_envs=1)
# model = PPO("MlpPolicy", env, verbose=1)
# model.learn(total_timesteps=25000)
# model.save("ppo_cartpole")
# del model
# model = PPO.load("ppo_cartpole")
# obs = env.reset()
# while True:
# action, _states = model.predict(obs)
# obs, rewards, dones, info = env.step(action)
# env.render()