-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlearn.py
161 lines (140 loc) · 4.45 KB
/
learn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import chess
import random
import chess.pgn
from negamax import MoveSelector
from weightsHandler import WeightsHandler
import evaluator
import config
def main():
# Open PGN file with games database
gamesFile = open(config.GAMES_FILE_NAME)
# Initialize counter
gamesCounter = 0
# Initialize move selection module
moveSelector = MoveSelector(config.MAX_ITER_MTD, config.MAX_DEPTH, config.MAX_SCORE)
# Initialize weight-handling module
weightsHandler = WeightsHandler("weights.py")
# Initialize learning rate
learningRate = config.ALPHA_INIT
# Loop over recorded games in file until counter reaches limit
while gamesCounter < config.MAX_GAMES:
# Get a game
game = chess.pgn.read_game(gamesFile)
try:
game.variation(0)
except KeyError:
continue
if not game:
break
# Find the winner
whitePnt = game.headers["Result"][0]
if whitePnt == "1" and game.headers["Result"][1] != "/":
winColor = chess.WHITE
elif whitePnt == "0":
winColor = chess.BLACK
else:
continue
print("\nGame ", gamesCounter + 1)
# Clear transposition table
moveSelector.clearTransTable()
# Play as both black and white
for color in range(2):
# Use local copy of game
state = game
# Get board object from game
board = state.board()
# Initialize list of board and board scores
scores = []
boards = [board.copy()]
# Initialize features list
featuresInit = []
featuresFinal = []
# Initialize turn counter
turnCounter = 0
if color:
print("White")
else:
print("Black")
# Loop through game, move by move
while not state.is_end():
# Get next board position
state = state.variation(0)
board = state.board()
# If computer's turn to move
if board.turn == color:
# Get score of board and position that computer aims to reach
_, score, finalBoard = moveSelector.selectMove(board)
# Store score, finalBoard and features of finalBoard
scores.append(score)
boards.append(finalBoard)
fI, fF = evaluator.findFeatures(finalBoard, color)
featuresInit.append(fI)
featuresFinal.append(fF)
turnCounter = turnCounter + 1
print("Turn ", turnCounter, '\r', end='')
print('\n', end='')
# Depending on winner, store final score
if winColor == color:
scores.append(config.MAX_SCORE)
else:
scores.append(-config.MAX_SCORE)
# Learn weights
initPosWeights, finalPosWeights = weightsHandler.getWeights()
initPosWeights, finalPosWeights = learn(
initPosWeights,
finalPosWeights,
featuresInit,
featuresFinal,
scores,
learningRate,
config.LAMBDA,
config.MAX_POSITION_SCORE
)
# Store weights
weightsHandler.setWeights(initPosWeights, finalPosWeights)
weightsHandler.writeWeightsToFile()
# Decrease learning rate
learningRate /= config.ALPHA_DEC_FACTOR
# Debug info
# print scores
# print featuresInit[10]
# print featuresFinal[10]
# print moveSelector._transTable.hits
# print moveSelector._transTable.notHits
# print moveSelector._transTable.size
# print initPosWeights
# Done learning from one game, so increment game counter
gamesCounter = gamesCounter + 1
# Close file handlers when learning is complete
weightsHandler.closeWeightsFile()
gamesFile.close()
def learn(wRawInit, wRawFin, fInit, fFinal, J, alpha, lambdaDecay, clampVal):
wInit = []
wFin = []
sizeJ = len(J)
# Unrolling parameters into vector
for j in range(6):
for i in range(64):
wInit.append(wRawInit[j][i])
wFin.append(wRawFin[j][i])
sizeW = len(wInit)
# Calculate update amount (with sign) for parameters
updateMagInit = [0 for i in range(sizeW)]
updateMagFinal = [0 for i in range(sizeW)]
for t in range(sizeJ - 1):
propTempDiff = 0 # Propagated temporal difference
for j in range(t, sizeJ - 1):
propTempDiff += lambdaDecay**(j - t) * (J[j + 1] - J[j])
updateMagInit = [updateMagInit[i] + (propTempDiff * fInit[t][i]) for i in range(sizeW)]
updateMagFinal = [updateMagFinal[i] + (propTempDiff * fFinal[t][i]) for i in range(sizeW)]
# Update parameters
for i in range(len(wInit)):
wInit[i] += alpha * updateMagInit[i]
wFin[i] += alpha * updateMagFinal[i]
# Rolling parameter vector
wRawInit = [[max(min(int(round(wInit[i + 64*j])), clampVal), -clampVal) for i in range (0, 64)] for j in range (0, 6)]
wRawFin = [[max(min(int(round(wFin[i + 64*j])), clampVal), -clampVal) for i in range (0, 64)] for j in range (0, 6)]
# Return final weights
return (wRawInit, wRawFin)
if __name__ == "__main__":
main()