-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
359 lines (328 loc) · 12.3 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
from nannon import *
from scratch_nn import *
from hillClimb_NN import *
import pickle
import random
from tqdm import tqdm
# Class for the value player that contains the loaded table to reduce on disk I/O time
class valuePlayerC:
def __init__(self):
self.table = pickle.load(open('nannon/mediocre_table.p', 'rb'))
def valuePlayer(self,pos,roll):
possiblePOS=legal_moves(pos,roll)
best_move = pos
bestScore = -1
if possiblePOS[0]!=-1:
# use python serializer to unpack the mediocre_table
mediocre_table = self.table
for possPOS in possiblePOS:
tempPosition = make_move(pos,possPOS,roll)
posVal = mediocre_table[tempPosition]
if bestScore < posVal:
bestScore = posVal
best_move = tempPosition
return best_move
else:
return pos
# player uses a provided nueral net
# to get the play function simply do somthing like
# netPlayerC(TD_Net.train_Net)).netPlayer
# will equal a function that plays using a TD net
class netPlayerC:
def __init__(self,net):
self.nn = net
def netPlayer(self,pos,roll):
possiblePOS=legal_moves(pos,roll)
best_move = pos
bestScore = -1
if possiblePOS[0]!=-1:
for possPOS in possiblePOS:
tempPosition = make_move(pos,possPOS,roll)
posVal = self.nn.query(tempPosition)[0][0]
if bestScore < posVal:
bestScore = posVal
best_move = tempPosition
return best_move
# moves based on which move presents your opponent with the least opportunity to improve
# a 1 deep min max tree
# possible score calculated with a NN
#pieces^2 * DICE
class minMaxNNPlayerC:
def __init__(self,net):
self.nn=net
def kbPlayer(self,pos,roll):
possiblePOS=legal_moves(pos,roll)
bestMove = -1
bestMoveScore = -10
oppScore = self.nn.query(swap_players(pos))
for move in possiblePOS:
# surface
tempScore = 0
situations =0
tempBoard = swap_players(make_move(pos,move,roll))
# for each roll
for x in range(1,7):#assume sided die, easy to fix later with globals
nMoves = legal_moves(tempBoard,x)
moveTscore = 0
# for each possible move
for nMove in nMoves:
tTempBoard = swap_players(make_move(tempBoard,nMove,x))
situations += 1
moveTscore += self.nn.query(tTempBoard)[0][0] - self.nn.query(swap_players(tTempBoard))[0][0]
tempScore = moveTscore/len(nMoves)
tempScore = tempScore/6
if tempScore > bestMoveScore:
bestMoveScore = tempScore
bestMove = move
return make_move(pos,bestMove,roll)
# when the player wins it then goes and tells the network that all the moves it made are good
# to get a TD trained net just do TD_NET().train_Net() to return the nueral net
# is TD Gameon
class TD_NET:
def __init__(self):
self.nn = ScratchNetwork(3,8,1)
self.trainNN = None
self.moves = []
self.start = False
self.trainCount = 0
def trainOnMoves(self,win):
# impliment decay rate
dec = .5 #decay rate
trainVal = -10
if not win:
trainVal = 10
for x in range(len(self.moves)):#trend towards winning positions
finalTrainval= dec**(len(self.moves)-(x+1)) * trainVal
self.nn.train(self.moves[x],finalTrainval)
self.moves = []
# TODO: Fix so the trianing gaurentees that it actually knows when it wins or loses
def TD_NetTrainPlay(self,pos,roll):
if not self.start:
self.start = True
self.startPos = pos
else:
if self.startPos == pos:
self.trainOnMoves(False)
possiblePOS=legal_moves(pos,roll)
best_move = pos
bestScore = -1
if possiblePOS[0]!=-1:
for possPOS in possiblePOS:
tempPosition = make_move(pos,possPOS,roll)
posVal = self.nn.query(tempPosition)[0][0]
if bestScore < posVal:
bestScore = posVal
best_move = tempPosition
# if the game would end
if who_won(tempPosition)==1:
# print("WIN")
self.trainOnMoves(True)
self.trainCount+=1
self.moves.append(pos)
return best_move
def train_Net(self,n = 1):
# print("INIT LEARN")
if not self.trainNN:
self.trainNN = TD_NET() #setts up training partner
for x in range(n):
play_tourn(self.TD_NetTrainPlay,self.trainNN.TD_NetTrainPlay)#test training against value
return self.nn
#
#
def testTDNET():
a = TD_NET()
for x in range(10):
print(play_tourn(netPlayerC(a.nn).netPlayer,rand_play))
a.train_Net(2)
## NOTE: Weights seem to be wierd and random
# however values most likely stand in comparison to each other
# rather than in comparison to values from table,
# Still means they are hard to gauge
#input determines number of iterations of training shown, each training session trains in a 1000 round tourney 10 times
def testTD(n=1):
net = TD_NET()
table = pickle.load(open('nannon/mediocre_table.p', 'rb'))
for x in range(n):
net.train_Net(10)
for pos in table.keys():
print(pos)
print(net.nn.query(pos)[0][0],((table[pos]-(-1))/2))
# testTD(3)
# Matchbox
# NOTE: Steps
# 1 use explore function to build map
# 2 fill each box with 3 beads of each number 1-6
# 3 hash map of each position and each move chosen
# at win or loose go back through hash of moves chosen and either add or delete
class MenacePlayerC:
def __init__(self):
moveList = explore()
checkerList =[]
for x in range(3):
for y in range(5):
checkerList.append(x)
self.boxCollection = dict()
for state in moveList:
self.boxCollection[state] = dict()
# account for dice rolls
for roll in range(1,7):
self.boxCollection[state][roll] = checkerList[:]#copy
self.moveCollection = dict()
def MenacePlayer(self,pos,roll):
checkerList = self.boxCollection[pos][roll][:]
moveFound = False
legalMoves = legal_moves(pos,roll)
# condition for if no move possible
if legalMoves[0] == -1:
return make_move(pos,-1,roll)
possibleMove = -1
# get a legal move from the box
while(not moveFound and len(checkerList)>0):
possibleMove= random.choice(checkerList)
if possibleMove in legalMoves:
moveFound = True
else:
checkerList.remove(possibleMove)
# should terminate
if(not moveFound):
possibleMove = random.choice(legalMoves)
# add chosen move to list of moves taken
moveList = []
if not pos in self.moveCollection:
self.moveCollection[pos] = dict()
if roll in self.moveCollection[pos]:
moveList = self.moveCollection[pos][roll]
moveList.append(possibleMove)
self.moveCollection[pos][roll] = moveList
return make_move(pos,possibleMove,roll)
# public facing play method
def publicMenacePlayer(self,pos,roll):
checkerList = self.boxCollection[pos][roll][:]
moveFound = False
legalMoves = legal_moves(pos,roll)
# condition for if no move possible
if legalMoves[0] == -1:
return make_move(pos,-1,roll)
possibleMove = -1
# get a legal move from the box
while(not moveFound and len(checkerList)>0):
possibleMove= random.choice(checkerList)
if possibleMove in legalMoves:
moveFound = True
else:
checkerList.remove(possibleMove)
# should terminate
if(not moveFound):
possibleMove = random.choice(legalMoves)
return make_move(pos,possibleMove,roll)
def trainBox(self,games=10000,opp=valuePlayerC().valuePlayer):
for x in range(games):
winner = play_game(self.MenacePlayer,opp)
if winner == "first":
self.trainWin()
else:
self.trainLose()
def trainWin(self):
# get postions from game
for pos in self.moveCollection:
for roll in self.moveCollection[pos]:
# get matching box for a position
box = self.boxCollection[pos][roll]
# add three of each choice made
for x in range(2):
# for each choice add coresponing pebble to box
for move in self.moveCollection[pos][roll]:
box.append(move)
# update box
box.sort()
self.boxCollection[pos][roll] = box
# print(self.boxCollection[pos])
# clear collection of moves
self.moveCollection = dict()
def trainLose(self):
for pos in self.moveCollection:
for roll in self.moveCollection[pos]:
# get matching box for a position
box = self.boxCollection[pos][roll]
# for each choice add coresponing pebble to box
for move in self.moveCollection[pos][roll]:
og = len(box)
# remove checker from box
if move in box:
box.remove(move)
# update box
self.boxCollection[pos][roll] = box
# clear collection of moves
self.moveCollection = dict()
# example of how box nn works
# not viable at low repetitions about the same as random
# most likely bad since it does not (currently) accuount for dice rolls just positions
# a better player could possibly be made by nesting
# UPDATE: after nesting dicts did not get better (when training for 100000 games)
# interesting that it does seem to teach itself what moves are impossible given a situation
def exampleOfBoxNN():
d = MenacePlayerC()
for x in range (10):
d.trainBox(10000)
print(play_tourn(d.publicMenacePlayer,rand_play))
# print(play_tourn(d.publicMenacePlayer,rand_play,1000))
with open('boxes.txt','w') as out:
for x in d.boxCollection:
out.write(str(x))
out.write("\n")
for y in d.boxCollection[x]:
out.write(str(y))
out.write("\n")
out.write(str(d.boxCollection[x][y]))
out.write("\n")
# exampleOfBoxNN()
# hill climber
# NOTE: Steps
# 1: set up empty net
# 2: set up mutated net
# 3: set up mating between them
class hillClimbC:
def __init__(self):
self.nn = hillClimb(3,5,1)
print(self.nn.weights_ho)
def train(self,nGens = 1000):
climbHist=[]
for x in tqdm(range(nGens)):
mutatedNet = self.nn.getMutation()
# print(self.nn.weights_ho,mutatedNet.weights_ho)
mutWin = play_tourn(netPlayerC(mutatedNet).netPlayer,netPlayerC(self.nn).netPlayer,500,100)
if mutWin > .5:
self.nn = mutatedNet
print("CHANGE",mutWin)
climbHist.append(play_tourn(netPlayerC(self.nn).netPlayer,rand_play,500,100))
return climbHist
def testClimb()
hc = hillClimbC()
climbH = hc.train(30)
print(climbH)
# # def basicNN(pos,roll):
# q = nn.query(target)
# print(q[0][0])
# print(scale(mediocre_table[target]))
# Target = ((2,7,7),(1,2,3))
# print(nn.query(Target)[0][0])
# print_board(Target)
# print(scale(mediocre_table[Target]))
# print(play_tourn(netPlayerC().netPlayer,rand_play))
# a = learningPlayerC()
# b = learningPlayerC()
# winP = []
# g= play_tourn(a.learningPlayer,valuePlayer,100)
# winP.append(g)
# print(g)
# for x in range(20):
#
# play_tourn(a.learningPlayer,b.learningPlayer,10)
# print("TOURN OVER")
# g= play_tourn(a.learningPlayer,rand_play,1000)
# winP.append(g)
# print(g)
# with open('output.txt','w') as out:
# for x in winP:
# out.write(str(x))
# out.write("\n")