-
Notifications
You must be signed in to change notification settings - Fork 2
/
hyperopt_python.py
79 lines (63 loc) · 2.52 KB
/
hyperopt_python.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# Use hyperopt to figure out the parameters of the python code to get the
# correct magnitudes in the plots
from __future__ import print_function # Only needed for Python 2
from daw_mbf_1 import Agent
from calcStayProb import CalcStayProb
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
import numpy as np
import cPickle as pickle
import sys
from IPython.core import ultratb
sys.excepthook = ultratb.FormattedTB(mode='Verbose', color_scheme='Linux', call_pdb=1)
# 'Ideal' values for the four bars
ideal = np.array([.774, .691, .697, .779])
# These parameters should be chosen and not optimized
num_runs = 40
num_steps = 40000
max_evals = 1000
def objective(args):
alpha = args['alpha']
noise = args['noise']
results = np.zeros((num_runs,4))
for i in range(num_runs):
agent = Agent(alpha=alpha, noise=noise)
temp_str = []
#print "firstStageChoice secondStage secondStageChoice finalReward"
firstStageChoice = None
secondStage = None
secondStageChoice = None
finalReward = None
for step in range(num_steps): # Repeat (for each step of episode):
if agent.oneStep() == None:
print ("oneStep broke")
break
if step%2 == 0: # in stage 1
firstStageChoice = agent.getLastAction()
secondStage = agent.getCurrBoardState()
else: # in stage 2
secondStageChoice = agent.getLastAction()
finalReward = agent.getCurrReward()
temp_str.append('{0} {1} {2} {3}'.format(firstStageChoice, secondStage, secondStageChoice, finalReward))
calculator = CalcStayProb()
results[i,:] = np.array(calculator.doItAllString(temp_str, return_value=True))
avg = np.mean(results, axis=0)
return {'loss': np.sqrt(np.mean((avg - ideal)**2)), 'status':STATUS_OK}
# Load from a previous run if possible
try:
trials = pickle.load(open('hyperopt_data.p', 'rb'))['trials']
previous_evals = len(trials)
except:
trials = Trials()
previous_evals = 0
#TODO: try different distributions
space = {'alpha':hp.uniform('alpha', 0, 1),
'noise':hp.uniform('noise', 0, 1)}
for i in range(previous_evals, max_evals + previous_evals):
print("Eval {0}/{1}".format(i+1, max_evals + previous_evals))
best = fmin(objective,
space=space,
algo=tpe.suggest,
trials=trials,
max_evals=i+1)
print(best)
pickle.dump({'best':best, 'trials':trials}, open('hyperopt_data.p', 'wb'))