-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
57 lines (43 loc) · 1.87 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import numpy as np
def safe_softmax(choices):
p_values = np.exp(choices-choices.max())/np.exp(choices-choices.max()).sum()
return p_values
def tempered_softmax(choices, temperature):
# cap choice values to avoid overflow
# choices[choices > 10] = 10
# calculate softmax over choices, weighted by temperature
p_values = np.exp(choices/temperature)/np.exp(choices/temperature).sum()
return p_values
def location_counter(state_list, domain):
location_counts = np.zeros(domain)
x, y = domain
for i in range(x):
for j in range(y):
location_counts[i, j] += state_list.count((i, j))
return location_counts
def action_counter(action_list, action_space):
action_counts = np.zeros(action_space)
for i in range(action_space[0]):
action_counts[i] += action_list.count(i)
return action_counts
def state_action_counter(state_action_list, state_action_space):
state_action_counts = np.zeros(state_action_space)
(x, y, a) = state_action_space
for i in range(x):
for j in range(y):
for k in range(a):
state_action_counts[i, j, k] += state_action_list.count((i, j, k))
return state_action_counts
def cumulative_reward(rewards):
cumulative = []
for n in range(len(rewards)-1):
cumulative.append(sum(rewards[:n]))
cumulative.append(sum(rewards))
return cumulative
def get_square_triangles(x, y, size):
half_size = size / 2
triangle1 = [(x + size, y), (x + half_size, y + half_size), (x, y), (x + size, y)]
triangle2 = [(x + size, y + size), (x + half_size, y + half_size), (x + size, y), (x + size, y + size)]
triangle3 = [(x, y + size), (x + half_size, y + half_size), (x + size, y + size), (x, y + size)]
triangle4 = [(x, y), (x + half_size, y + half_size), (x, y + size), (x, y)]
return [triangle1, triangle2, triangle3, triangle4]