-
Notifications
You must be signed in to change notification settings - Fork 0
/
strategies.r
44 lines (29 loc) · 1.13 KB
/
strategies.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# We maintain the belief-goal-action view.
# The goal can be either directly from the environment,
# or implicitly assumed by the decision rule.
# A strategy actually consists of (at least) two parts:
# the decision rule, which takes in a belief and outputs an action,
# and a belief revision rule, which takes in the current belief
# and the new information, and outputs a new belief
# In this sense, the action-value methods all share
# the same belief revision rule.
action.value <- function(oldvals, action, reward, nth){
#Take as input the vector of old expected values, the current action
#and reward, and the times this action has been chosen
oldvals[action] <- oldvals[action] + (reward - oldvals[action])/nth
return(oldvals)
}
#Greedy rule always chooses the (first) current optimal action
greedy <- function(vals){
return( which.max(vals) )
}
#Epsilon greedy rule has probability epsilon to randomly choose a
#non-optimal action
eps.greedy <- function(vals,eps){
optimal = which.max(vals)
if (runif(1) >= eps){
return(optimal)
}
return( sample( (1:length(vals))[-optimal] ,1) )
}
#Softmax