forked from georgesung/deep_rl_acrobot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfull_training.py
80 lines (64 loc) · 2.45 KB
/
full_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
'''
Given the optimal parameter combination, run 1500 episodes of training from scratch,
followed by 8500 episodes of training with the learning rate reduced by a factor of 10
More details in the "Refinement" section of the report.
'''
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
import random
import gym
import math
import matplotlib.pyplot as plt
import pickle
import learning_agent
# Set parameters in learning_agent
learning_agent.ACTOR_LR = 0.05
learning_agent.CRITIC_LR_SCALE = 0.5
learning_agent.REWARD_DISCOUNT = 0.97
learning_agent.A_REG_SCALE = 0.00005
learning_agent.C_REG_SCALE = 0.0005
# Enable saving the model to disk
learning_agent.SAVE_MODEL = True
###################################################################
# Phase 1: Run training over 1500 episodes, save the trained model
###################################################################
# Configure learning_agent to run 1500 training episodes, from scratch
learning_agent.NUM_EPISODES = 1500
learning_agent.RESUME = False
# Run RL algorithm until it does not return an error
while True:
avg_rewards1, score1 = learning_agent.run_rl()
if avg_rewards1 is not None:
break
print('Phase 1 complete, score: %f' % score1)
###################################################################
# Phase 2: Load model from Phase 1, reduce the learning rate by
# a factor of 10, run another 8500 training episodes
###################################################################
# Configure learning_agent appropriately
learning_agent.NUM_EPISODES = 8500
learning_agent.RESUME = True
learning_agent.ACTOR_LR /= 10
# Run RL algorithm until it does not return an error
while True:
avg_rewards2, score2 = learning_agent.run_rl()
if avg_rewards2 is not None:
break
print('Phase 2 complete, final score: %f' % score2)
print('Final model saved at %s' % learning_agent.MODEL_LOC)
avg_rewards = np.concatenate((avg_rewards1, avg_rewards2))
# Save the avg_rewards list just in case we need it later
# Maybe our plot was unclear, and we need to re-plot w/ same data
print('Saving avg_rewards to avg_rewards.p')
with open('avg_rewards.p', 'wb') as avg_rewards_out:
pickle.dump(avg_rewards, avg_rewards_out)
print('Plotting avg rewards over episodes')
plt.plot(avg_rewards)
plt.title('Average Reward over Episodes')
plt.ylabel('Average Reward')
plt.xlabel('Episode')
plt.show()