-
Notifications
You must be signed in to change notification settings - Fork 0
/
testbed_run.r
62 lines (47 loc) · 1.94 KB
/
testbed_run.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
source("strategies.r")
# play one round
play1round<-function(vals, strat.dec,
strat.rev = action.value, t.max = 1000,
init.vals = rep(0,length(vals)), ...){
# play 1 round for t.max pulls, the mean value vector vals, using the
# decision and revision strategies, with initial value estimations and
# optional arguments for strat.dec
actions.times <- rep(0,length(vals))
actions <- rep(NA, t.max)
rewards <- rep(NA, t.max)
action <- strat.dec(init.vals, ...)
reward <- rnorm(1, mean=vals[action], sd=1)
actions.times[action] <- 1
actions[1] <- action
rewards[1] <- reward
vals.expected <- strat.rev(init.vals, action, reward, 1)
for (t in 2:t.max){
action <- strat.dec(vals.expected, ...)
reward <- rnorm(1, mean=vals[action], sd=1)
actions.times[action] <- actions.times[action] + 1
actions[t] <- action
rewards[t] <- reward
vals.expected <- strat.rev(vals.expected, action, reward,
actions.times[action]
)
}
optimal <- (vals[actions]==max(vals))
optimal.percents <- cumsum(optimal) / seq_along(optimal)
avgrewards <- cumsum(rewards) / seq_along(rewards)
#return(c(actions,rewards))
return( c(optimal.percents,avgrewards) )
}
testbed.run <- function(tbed, strat.dec,
strat.rev = action.value, t.max = 1000,
init.vals=rep(0,NCOL(tbed)), ...){
outcome <- apply(testbed,1,
function(v)play1round(v, strat.dec,
strat.rev, t.max,
init.vals, ...))
avg.optpercent <- colMeans(t(outcome)[,1:t.max])
avg.cumrewards <- colMeans(t(outcome)[,(t.max+1):(2*t.max)])
return( data.frame(OptimalPercent=avg.optpercent,
CummulativeRewards=avg.cumrewards)
)
}
testbed <- read.csv("testbed.csv")