-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathShowcase Normal Q-Learning with NN.R
117 lines (96 loc) · 6.97 KB
/
Showcase Normal Q-Learning with NN.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
library(ReinforcementLearningwithR)
require(compiler)
strat <- c("strat.a") #"strat.a" to "strat.i" with get.antistrat or any other strategy (vector) without "self" implies self play.
antistrat <- "none" #or "none" if none of the strategies of above or several strategies are given.
file.name <- paste0("opt.run.",paste0(strat,collapse="."),".",Sys.Date(),".NN") #File, where the results are saved
#Parameters of game. Currently supported: "BattleOfStrategiesThesis.Baseline" and "BattleOfStrategies2019"
game.setting <- "BattleOfStrategiesThesis.Baseline"
block.no <- 150 #Number of Blocks to Play. More should be always better, but time increases somewhat linear (given the memory is sufficiently full), while we have strong diminishing return in the final performance depending on the complexity of the strategy.
eval.no <- 1000 #Number of played matches to evaluate final performance of model with the model StratTourn
T.max <- 60 #Number of periods of game. Note: If one wants to change this, it is recommended, that algo.par$batch.size is changed as well.
#If Memory initilization through self.play is wished it may be set here. 0 means no initialization.
memory.initialization <- 0
#Set the most important parameters of a Neural Network here.
## nodes.layer.1 measures the number of nodes in the first hidden layer. More means more complex strategies may be tackled, but risks overfitting and might need more training data and more epochs.
## nodes.layer.2 measures the number of nodes in the second hidden layer. More means more complex strategies may be tackled, but risks overfitting and might need more training data and more epochs.
## batch.size.train is the NN internal size of how big a neural network batch should be (see https://stats.stackexchange.com/questions/153531/what-is-batch-size-in-neural-network). More means more stable, but slower/more epochs needed.
##epochs is the number of how often the complete training data should be propagated through the network. More means more overfitting but better accuracy in describing the training data. Very relevant for speed.
##give.up.precision controls a method used in the thesis of Martin Kies: After training epochs times it is checked whether the new LOSS is better than the one in the block before. If not the training is repeated up to give.up.precision times. The actual number of epochs such may vary between epochs and epochs*give.up.precision. As this is the showcase for a benchmark case the default uses no give.up.precision
func.approx.params <- list(nodes.layer.1=126, nodes.layer.2=64, batch.size.train=32, epochs=50, give.up.precision=0)
#This defines the function which allows an easy access to the package
generate.best.strat <- function(strat, antistrat="none", game.setting, func.approx.params, memory.initialization, block.no, eval.no, T.max, file.name){
restore.point("generate.best.strat")
game.object <- Get.Game.Object.PD(encoding.state="Harper",game.setting=game.setting,strats=strat, eval.strategy = "Model.strat.NN.Harper")
assign("game.object",game.object,envir=.GlobalEnv) #necessary for the tournament
#Define the non-changing parameters of the algorithm like which features and parameters to be used.
algo.par <- Get.Def.Par.QLearningPersExpPath(setting="QLearning.Basic")
algo.par$gamma <- game.object$game.pars$delta #recommended as the algorithm otherwise optimises for a different setting as the game itself
#Define the function approximator and its parameters
model.par <- Get.Def.Par.Neural.Network(setting="ThesisBasic")
model.par$hidden.nodes[1] <- func.approx.params$nodes.layer.1
model.par$hidden.nodes[2] <- func.approx.params$nodes.layer.2
model.par$batch.size.train <- func.approx.params$batch.size.train
model.par$epochs <- func.approx.params$epochs
model.par$give.up.precision <- func.approx.params$give.up.precision
if(func.approx.params$give.up.precision==0){
model.par$enforce.increasing.precision <- FALSE
} else {
model.par$enforce.increasing.precision <- TRUE
}
#Setup the model and other variational aspects
evaluator <- Setup.QLearningPersExpPath(game.object, algo.par=algo.par, model.par=model.par)
if(memory.initialization==0){
memory.init <- "none"
start.w.training <- FALSE
} else {
memory.init <- "self.play"
start.w.training <- TRUE
}
algo.var <- Initialise.QLearningPersExpPath(game.object, algo.par, memory.init=memory.init, memory.param=list(no=memory.initialization), model.par=model.par)
#Execute the algorithm
res <- Train.QLearningPersExpPath(evaluator=evaluator, model.par=model.par, algo.par=algo.par, algo.var=algo.var, game.object = game.object, blocks=block.no, eval.only=FALSE, start.w.training = start.w.training,out.file=paste0(file.name,".tmp"))
#Save Memory & model
evaluator <- res$evaluator
algo.var <- res$algo.var
idio.name <- paste0("opt.run.NN.full.",paste0(strat,collapse="."))
file.name <- paste0(idio.name, format(Sys.time(), "%d-%b-%Y %H.%M"),"before.StratTourn", sep=" ")
save(evaluator, algo.var, algo.par, game.object, model.par, file=file.name)
# Do the StratTourn evaluation
game = make.pd.game(uCC=game.object$game.pars$uCC, uCD=game.object$game.pars$uCD, uDC=game.object$game.pars$uDC, uDD=game.object$game.pars$uDD, err.D.prob=game.object$game.pars$err.D.prob, err.C.prob=game.object$game.pars$err.C.prob, delta=game.object$game.pars$delta)
#Prepare list of strategies for StratTourn
if(antistrat!="none"){
strat.tourn = nlist(Model.strat.NN.Harper,get(strat), get(antistrat))
names(strat.tourn)[2] <- strat
names(strat.tourn)[3] <- antistrat
} else {
strat.tourn = c(Model.strat.NN.Harper,lapply(strat,FUN=function(x){
if(x!="self"){
return(get(x))
} else {
return(NULL)
}
}))
names(strat.tourn) <- seq_along(strat.tourn)
strat.tourn[sapply(strat.tourn, is.null)] <- NULL
names(strat.tourn)[1] <- "Model.strat.NN.Harper"
names(strat.tourn)[2:length(names(strat.tourn))] <- strat[strat!="self"]
}
#Initialize tournament
tourn = init.tournament(game=game, strat=strat.tourn)
tourn = run.tournament(tourn=tourn, R = eval.no, T.max=T.max)
#Calculate a single relevant statistic. If against a single strategy this is the return against this strategy. If against a tournament this is the tournament performance.
if(length(strat)==1){
r.limit <- get.matches.vs.matrix(tourn$dt)["Model.strat.NN.Harper",strat]
} else {
srfm <- strat.rank.from.matches(tourn$dt)
r.limit <- srfm[srfm$strat=="Model.strat.NN.Harper",mean]
}
file.name <- paste0(idio.name, format(Sys.time(), "%d-%b-%Y %H.%M"), sep=" ")
#Save Memory & model
save(evaluator, algo.var, algo.par, game.object, model.par, r.limit, tourn, file=file.name)
#Show Tournament
show.tournament(tourn)
}
disable.restore.points(TRUE)
enableJIT(3)
generate.best.strat(strat=strat, antistrat=antistrat, game.setting=game.setting, func.approx.params=func.approx.params, memory.initialization=memory.initialization, block.no=block.no, eval.no=eval.no, T.max=T.max, file.name=file.name)