-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathtrainer.py
62 lines (59 loc) · 2.55 KB
/
trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python
# coding=utf-8
import torch
'''
Author: JiangJi
Email: [email protected]
Date: 2022-12-24 20:50:16
LastEditor: JiangJi
LastEditTime: 2022-12-24 20:50:16
Discription:
'''
class Trainer:
def __init__(self) -> None:
pass
def train_one_episode(self, env, agent, cfg):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
for _ in range(cfg.max_steps):
sum_reward = 0 ## N-step rewards
for j in range(cfg.n_step):
action = agent.sample_action(state) # sample action
next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
sum_reward += reward
ep_step += 1
if j == 0 :
init_state = state
if truncated or terminated or ep_step >= cfg.max_steps:
break
## calculate the error in the PER DQN
policy_val = agent.policy_net(torch.tensor(state, device = cfg.device))[0][action]
target_val = agent.target_net(torch.tensor(next_state, device = cfg.device))[0]
if terminated:
error = abs(policy_val - sum_reward)
else:
error = abs(policy_val - sum_reward - cfg.gamma * torch.max(target_val))
agent.memory.push(error.cpu().detach().numpy(), (state, action, sum_reward,
next_state, terminated)) # save transitions
agent.update() # update agent
state = next_state # update next state for env
ep_reward += sum_reward #
if terminated or ep_step >= cfg.max_steps:
break
res = {'ep_reward':ep_reward,'ep_step':ep_step}
return agent,res
def test_one_episode(self, env, agent, cfg):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
for _ in range(cfg.max_steps):
ep_step += 1
action = agent.predict_action(state) # sample action
next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
state = next_state # update next state for env
ep_reward += reward #
if terminated:
break
res = {'ep_reward':ep_reward,'ep_step':ep_step}
return agent,res