-
Notifications
You must be signed in to change notification settings - Fork 1
/
Agent.py
executable file
·70 lines (52 loc) · 1.81 KB
/
Agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import numpy as np
"""
m:ft,feature
batchSize: a trajectory
batchLength: trajectories
"""
class Agent(object):
def __init__(self, fileName, m, batchSize, trajecNum):
self.action_space = [-1, 0, 1]
self.m = m
self.batchSize = batchSize
self.trajecNum = trajecNum
self.state = []
f = open(fileName, 'r')
self.dataBase = f.readline()
self.dataBase = self.dataBase.split(',')
self.dataBase.pop()
for i in range(len(self.dataBase)):
self.dataBase[i] = float(self.dataBase[i])
for i in range(1, len(self.dataBase)):
self.dataBase[i] = self.dataBase[i] - self.dataBase[i-1]
for i in range(self.m-1, len(self.dataBase)):
state_tmp = self.dataBase[i-m+1:i+1] if i >= self.m-1 else self.dataBase[0:i]
self.state.append(state_tmp)
self.state = self.state[m-1:]
def choose_action(self,state):
pass
# return np.random.randint(-1,2)
def get_trajectory(self):
index = np.random.randint(0, len(self.state)-self.batchSize+1)
state = self.state[index:index+self.batchSize]
action = self.choose_action(state)-1
print(action)
#print('----')
rewards = [float(0)]
for i in range(1, self.batchSize):
rew = action[i-1] * state[i][-1] - 1 * abs(action[i]-action[i-1])
#print(rew)
rewards.append(rew)
print(rewards)
return {"reward":rewards,
"state": state,
"action": action
}
def get_trajectories(self):
trajectories = []
i=0
while i < self.trajecNum:
i += 1
trajectory = self.get_trajectory()
trajectories.append(trajectory)
return trajectories