-
Notifications
You must be signed in to change notification settings - Fork 3
/
experiment.py
79 lines (62 loc) · 2.08 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 19 18:50:24 2017
@author: hulsed
"""
import ibfmOpt as io
import numpy as np
from matplotlib import pyplot as plt
import ibfm
controllers=4
conditions=3
modes=3
iterations=500
runs=1
FullPolicy=io.initFullPolicy(controllers,conditions)
QTab=io.initQTab(controllers, conditions, modes)
rewardhist=np.ones([runs,iterations])
io.createVariants()
initexperiment=ibfm.Experiment('monoprop')
actionkey=io.initActions()
for i in range(runs):
QTab=io.initQTab(controllers, conditions, modes)
for j in range(iterations):
FullPolicy=io.selectPolicy(QTab, FullPolicy)
actions, instates, utilityscores, designcost =io.evaluate(FullPolicy,initexperiment)
utility=sum(utilityscores)-designcost
rewardhist[i,j]=utility
for k in range(len(utilityscores)):
action=actions[k]
instate=instates[k]
reward=utilityscores[k]
#note: not sure why individual rewards don't work, but they don't
Qtab=io.Qlearn(QTab,action,instate,sum(utilityscores))
#QTab=io.avlearnnotracking(QTab, FullPolicy,utility)
print(utility)
avereward=np.ones(iterations)
stdreward=np.ones(iterations)
maxreward=np.ones(iterations)
minreward=np.ones(iterations)
cumulativemax=np.ones(iterations)
for k in range(iterations):
avereward[k]=np.mean(rewardhist[:,k])
stdreward[k]=np.std(rewardhist[:,k])
maxreward[k]=np.max(rewardhist[:,k])
minreward[k]=np.min(rewardhist[:,k])
if k==0:
cumulativemax[k]=maxreward[k]
else:
if maxreward[k]>cumulativemax[k-1]:
cumulativemax[k]=maxreward[k]
else:
cumulativemax[k]=cumulativemax[k-1]
x=range(iterations)
plt.plot(cumulativemax)
plt.title('Best Design Found by Learner Over Time')
plt.xlabel('Function Evaluations')
plt.ylabel('Utility Value')
plt.errorbar(x,avereward,stdreward, linestyle='None',marker='+')
plt.title('Convergence of Learner Over Time')
plt.xlabel('Iterations')
plt.ylabel('Score of End-States')
plt.grid()