Merge pull request #18 from eboatwright/dev

v3.1 (Patch 3)
eboatwright · Feb 23, 2024 · 28cca03 · 28cca03
2 parents 8d132f2 + 8bf79b9
commit 28cca03
Show file tree

Hide file tree

Showing 17 changed files with 819 additions and 182 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,4 +13,5 @@ Cargo.lock
 # MSVC Windows builds of rustc generate these, which store debugging information
 *.pdb
 
-sharpener
+lichess_db_eval
+nnue_trainer/__pycache__
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@
 ## Features
 #### Parameters
  - fen=\<FEN STRING>: Sets up the board by a fen string (Doesn't work for UCI games) (default=STARTING_FEN)
- - debug=\<BOOLEAN>: Toggle debug output that gets outputed per ply (default=true)
+ - debug_output=\<BOOLEAN>: Toggle debug output that gets outputed per ply (default=true)
  - opening_book=\<BOOLEAN>: Toggle built-in opening book (default=false)
  - time_management=\<BOOLEAN>: Toggle time management, if false the bot will use all the remaining time (default=true)
  - hash_size=\<INTEGER>: Sets the hash size in Megabytes, there's also a UCI option for this under the name "Hash" (default=256)

diff --git a/To-do list.txt b/To-do list.txt
@@ -1,10 +1,4 @@
-thoughts on NNUE:
-	I've wanted to learn how to write neural net for a long time, so I want to implement NNUE eventually.
-	But what I'm not going to do is just find a SF NNUE library and stick it in there because that's lame
-
-	Update:
-	I've learned how to write neural networks, and trained one to evaluate Tic-Tac-Toe positions!
-	So for either v3.2 or v3.3 I'm gonna work on writing my own version of NNUE from scratch
+NNUE training is currently underway!
 
 figure out some sort of multithreading:
 	to implement pondering I think I'll have to add multithreading
@@ -28,7 +22,6 @@ calculate my own magic numbers; currently "borrowing" Sebastian Lague's ^^
 check out pin detection to speed up check detection
 try writing a struct that sorts moves incrementally
 	I tried this a couple times, but haven't got it faster than my current solution
-re-implement PV table with a different approach; I don't like the 2d array
 
 History reductions / pruning
 https://www.chessprogramming.org/Internal_Iterative_Deepening

diff --git a/nnue_trainer/Perfect2021.bin b/nnue_trainer/Perfect2021.bin
diff --git a/nnue_trainer/config.py b/nnue_trainer/config.py
@@ -0,0 +1,15 @@
+GAMES_PER_MATCH = 1_500
+EPOCHS_PER_TRAIN = 2 # ?
+MINIBATCH_SIZE = 10_000
+LEARNING_RATE = 0.0009
+
+DEPTH_PER_MOVE = 10
+PERC_CHANCE_FOR_RANDOM_MOVE = 2
+CONCURRENT_GAMES = 4
+MAX_MOVES = 120
+
+INPUT_NODES = 768
+HIDDEN_NODES = 64 # 256?
+OUTPUT_NODES = 1
+
+BUCKETS = 1 # 8
diff --git a/nnue_trainer/main.py b/nnue_trainer/main.py
@@ -0,0 +1,130 @@
+# The developer of the Weiawaga engine's NNUE trainer: Mimir, was very very helpful in making this!
+# https://github.com/Heiaha/Mimir/
+
+import random
+
+import chess
+import chess.engine
+import chess.polyglot
+import asyncio
+
+import config
+from neural_network import NeuralNetwork
+
+
+class TrainingResults:
+	def __init__(self):
+		self.games = 0
+		self.positions = 0
+
+
+class DataPoint:
+	def __init__(self, fen, outcome):
+		self.fen = fen
+		self.outcome = outcome
+
+
+training_results = TrainingResults()
+nn = NeuralNetwork()
+data_points = []
+
+
+async def play_game():
+	transport, maxwell_engine = await chess.engine.popen_uci(["./../target/release/maxwell", "debug_output=false"])
+	board = chess.Board()
+
+	with chess.polyglot.open_reader("Perfect2021.bin") as reader:
+		number_of_book_moves = random.randint(1, 10)
+
+		for i in range(number_of_book_moves):
+			board.push(reader.choice(board).move)
+
+	fen_strings = []
+
+	while not board.is_game_over(claim_draw=True):
+		if random.randint(0, 100) < config.PERC_CHANCE_FOR_RANDOM_MOVE:
+			board.push(random.choice(list(board.legal_moves)))
+		else:
+			result = await maxwell_engine.play(board, chess.engine.Limit(depth=config.DEPTH_PER_MOVE))
+			board.push(result.move)
+
+		fen_strings.append(board.fen())
+
+		if board.fullmove_number >= config.MAX_MOVES:
+			break
+
+	game_outcome = 0.0
+
+	# For some reason when it detects a threefold-repetition or a 50 move draw, it returns None instead of a draw :P
+	if outcome := board.outcome(): 
+		if outcome.winner == chess.WHITE:
+			game_outcome = 1.0
+		elif outcome.winner == chess.BLACK:
+			game_outcome = -1.0
+
+	for fen in fen_strings:
+		data_points.append(DataPoint(fen, game_outcome))
+
+	await maxwell_engine.quit()
+
+
+async def play_games():
+	games_completed = 0
+
+	pending = {asyncio.create_task(play_game()) for _ in range(config.CONCURRENT_GAMES)}
+
+	while len(pending) > 0:
+		print(f"Playing self-play games... {games_completed}/{config.GAMES_PER_MATCH}", end="\r", flush=True)
+
+		completed, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
+
+		for completed_task in completed:
+			games_completed += 1
+
+			if games_completed + len(pending) < config.GAMES_PER_MATCH:
+				pending.add(asyncio.create_task(play_game()))
+
+
+if __name__ == "__main__":
+	print("### MAXWELL NNUE TRAINER ###\n")
+
+	training_cycle = 0
+	# nn.save_weights() # Save the initial randomized weights so that the program has the same weights as the trainer
+
+	while True:
+		data_points = []
+
+		print(f"Training cycle {training_cycle + 1}:")
+
+		asyncio.run(play_games())
+
+		training_results.games += config.GAMES_PER_MATCH
+		training_results.positions += len(data_points)
+
+		print("\nSelf-play done!\n")
+		print("Training network...")
+
+		for epoch in range(config.EPOCHS_PER_TRAIN):
+			print(f"Epoch {epoch + 1}...")
+
+			random.shuffle(data_points)
+
+			data_point_index = 0
+
+			while data_point_index < len(data_points):
+				next_index = min(data_point_index + config.MINIBATCH_SIZE, len(data_points))
+
+				nn.back_prop(data_point_index, next_index, data_points[data_point_index:next_index])
+
+				data_point_index = next_index
+
+		print("Done training!")
+		# print("Calculating total error...")
+		# print(f"Total error on data set: {nn.get_total_error(data_points)}")
+		# print("Done!\n")
+
+		print(f"Total games played: {training_results.games}")
+		print(f"Total positions trained on: {training_results.positions}\n")
+
+		training_cycle += 1
+		nn.save_weights()
diff --git a/nnue_trainer/matrix.py b/nnue_trainer/matrix.py
@@ -0,0 +1,133 @@
+import random
+
+class Matrix:
+	def __init__(self, rows, cols):
+		self.rows = rows
+		self.cols = cols
+
+		self.data = []
+		for row in range(rows):
+			new_row = []
+			for col in range(cols):
+				new_row.append(0.0)
+			self.data.append(new_row)
+
+	def from_2d_list(input_2d_list):
+		result = Matrix(len(input_2d_list), len(input_2d_list[0]))
+		result.data = input_2d_list
+		return result
+
+	def fill_zeros(self):
+		for row in range(self.rows):
+			for col in range(self.cols):
+				self.data[row][col] = 0.0
+
+	def flatten(self):
+		result = []
+
+		for row in range(self.rows):
+			for col in range(self.cols):
+				result.append(self.data[row][col])
+
+		return result
+
+	def transpose(a):
+		result = Matrix(a.cols, a.rows)
+
+		for row in range(a.rows):
+			for col in range(a.cols):
+				result.data[col][row] = a.data[row][col]
+
+		return result
+
+	def random(rows, cols):
+		result = Matrix(rows, cols)
+
+		for row in range(rows):
+			for col in range(cols):
+				result.data[row][col] = random.uniform(-0.8, 0.8)
+
+		return result
+
+	def add(a, b):
+		result = Matrix(a.rows, a.cols)
+
+		for row in range(a.rows):
+			for col in range(a.cols):
+				result.data[row][col] = a.data[row][col] + b.data[row][col]
+
+		return result
+
+	def subtract(a, b):
+		result = Matrix(a.rows, a.cols)
+
+		for row in range(a.rows):
+			for col in range(a.cols):
+				result.data[row][col] = a.data[row][col] - b.data[row][col]
+
+		return result
+
+	def multiply(a, b):
+		result = Matrix(a.rows, a.cols)
+
+		for row in range(a.rows):
+			for col in range(a.cols):
+				result.data[row][col] = a.data[row][col] * b.data[row][col]
+
+		return result
+
+	def divide(a, b):
+		result = Matrix(a.rows, a.cols)
+
+		for row in range(a.rows):
+			for col in range(a.cols):
+				result.data[row][col] = a.data[row][col] / b.data[row][col]
+
+		return result
+
+	def divide_by_num(mat, num):
+		result = Matrix(mat.rows, mat.cols)
+
+		for row in range(mat.rows):
+			for col in range(mat.cols):
+				result.data[row][col] = mat.data[row][col] / num
+
+		return result
+
+	def dot(a, b):
+		result = Matrix(a.rows, b.cols)
+
+		for row in range(result.rows):
+			for col in range(result.cols):
+				sum_of_column = 0
+
+				for offset in range(a.cols):
+					sum_of_column += a.data[row][offset] * b.data[offset][col]
+
+				result.data[row][col] = sum_of_column
+
+		return result
+
+	def scale(m, s):
+		result = Matrix(m.rows, m.cols)
+
+		for row in range(result.rows):
+			for col in range(result.cols):
+				result.data[row][col] = m.data[row][col] * s
+
+		return result
+
+	def pow(m, e):
+		result = Matrix(m.rows, m.cols)
+
+		for row in range(result.rows):
+			for col in range(result.cols):
+				result.data[row][col] = m.data[row][col] ** e
+
+		return result
+
+	def map(m, fn):
+		for row in range(m.rows):
+			for col in range(m.cols):
+				m.data[row][col] = fn(m.data[row][col])
+		return m