-
Notifications
You must be signed in to change notification settings - Fork 0
/
agent.py
56 lines (45 loc) · 2.33 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import numpy as np
import torch
import torch.nn as nn
import logging
from model import ActorNet, CriticNet
from replay_memory import NStepBackup, PrioritizedReplayBuffer
DATA_DEMO = 0
DATA_RUNTIME = 1
class DDPGfDAgent(nn.Module):
def __init__(self, conf, device):
super(DDPGfDAgent, self).__init__()
self.conf = conf
self.device = device
self.logger = logging.getLogger('DDPGfD')
# self.device = self.conf.device
self.actor_b = ActorNet(self.conf.state_dim, self.conf.action_dim, self.device)
self.actor_t = ActorNet(self.conf.state_dim, self.conf.action_dim, self.device)
self.critic_b = CriticNet(self.conf.state_dim, self.conf.action_dim, self.device)
self.critic_t = CriticNet(self.conf.state_dim, self.conf.action_dim, self.device)
self.rs = np.random.RandomState(self.conf.seed)
self.backup = NStepBackup(self.conf.gamma, self.conf.N_step)
self.memory = PrioritizedReplayBuffer(self.conf.replay_buffer_size, self.conf.seed, alpha=0.3,
beta_init=1.0, beta_inc_n=100)
def episode_reset(self):
self.backup.reset()
def obs2tensor(self, state):
return torch.from_numpy(state).float()
def update_target(self, src, tgt, episode=-1): # update to target network
if not self.conf.discrete_update or episode == -1: # soft update
for src_param, tgt_param in zip(src.parameters(), tgt.parameters()):
tgt_param.data.copy_(self.conf.tau * src_param.data + (1.0 - self.conf.tau) * tgt_param.data)
self.logger.debug('(Soft)Update target network')
else:
if episode % self.conf.discrete_update_eps == 0:
for src_param, tgt_param in zip(src.parameters(), tgt.parameters()):
tgt_param.data.copy_(src_param.data)
self.logger.info('(Discrete)Update target network,episode={}'.format(episode))
def add_n_step_experience(self, data_flag=DATA_RUNTIME,
done=False): # Pop (s,a,r,s2) pairs from N-step backup to PER
while self.backup.available(done):
success, exp = self.backup.pop_exp(done)
if success:
self.memory.add((*exp, data_flag))
if done:
self.logger.debug('Done: All experience added')