-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_lab2_problem3.py
104 lines (90 loc) · 3.81 KB
/
test_lab2_problem3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Copyright [2020] [KTH Royal Institute of Technology] Licensed under the
# Educational Community License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may
# obtain a copy of the License at http://www.osedu.org/licenses/ECL-2.0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS"
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
#
# Course: EL2805 - Reinforcement Learning - Lab 2 Problem 1
# Code author: [Alessio Russo - [email protected]]
# Last update: 6th October 2020, by [email protected]
#
# Modified by: [Franco Ruggeri - [email protected]]
import unittest
import numpy as np
import gym
import torch
from pathlib import Path
from el2805.agents.rl import PPO
from el2805.agents.rl.utils import get_device
from tests.utils import test
class PPOTestCase(unittest.TestCase):
seed = 1
environment = None
rng = None
def setUp(self):
self.environment = gym.make('LunarLanderContinuous-v2')
self.environment.seed(self.seed)
self.rng = np.random.RandomState(self.seed)
def tearDown(self):
self.environment.close()
def test_training(self):
# Hyper-parameters
n_episodes = 1600
discount = .99
n_epochs_per_update = 10
epsilon = .2
critic_learning_rate = 1e-3
critic_hidden_layer_sizes = [400, 200]
critic_hidden_layer_activation = "relu"
actor_learning_rate = 1e-5
actor_shared_hidden_layer_sizes = [400]
actor_mean_hidden_layer_sizes = [200]
actor_var_hidden_layer_sizes = [200]
actor_hidden_layer_activation = "relu"
gradient_max_norm = 1
early_stopping_reward = 250
# Agent
agent = PPO(
environment=self.environment,
discount=discount,
n_epochs_per_step=n_epochs_per_update,
critic_learning_rate=critic_learning_rate,
critic_hidden_layer_sizes=critic_hidden_layer_sizes,
critic_hidden_layer_activation=critic_hidden_layer_activation,
actor_learning_rate=actor_learning_rate,
actor_shared_hidden_layer_sizes=actor_shared_hidden_layer_sizes,
actor_mean_hidden_layer_sizes=actor_mean_hidden_layer_sizes,
actor_var_hidden_layer_sizes=actor_var_hidden_layer_sizes,
actor_hidden_layer_activation=actor_hidden_layer_activation,
epsilon=epsilon,
gradient_max_norm=gradient_max_norm,
device=get_device(),
seed=self.seed
)
agent.train(n_episodes=n_episodes, early_stop_reward=early_stopping_reward)
def compute_action(state):
action = agent.compute_action(state, explore=True)
return action
test(self, self.environment, compute_action)
def test_saved_model(self):
model_path = \
Path(__file__).parent.parent / "results" / "lab2" / "problem3" / "task_c" / "neural-network-3-actor.pth"
model = torch.load(model_path)
def compute_action(state):
with torch.no_grad():
state = torch.as_tensor(
data=state.reshape((1,) + state.shape),
dtype=torch.float64
)
mean, var = model(state)
mean, var = mean.reshape(-1), var.reshape(-1)
action = torch.normal(mean, torch.sqrt(var))
action = action.numpy()
return action
test(self, self.environment, compute_action)
if __name__ == '__main__':
unittest.main()