Restyled by yapf

brendanator · Feb 24, 2020 · e78be0a · e78be0a
1 parent 41a306c
commit e78be0a
Show file tree

Hide file tree

Showing 3 changed files with 87 additions and 86 deletions.
diff --git a/consts.py b/consts.py
@@ -15,7 +15,8 @@
 TILED_COLUMNS = np.arange(TOTAL_DISKS) % WIDTH
 
 ROW_EDGE_DISTANCE = np.min([TILED_ROWS, np.flip(TILED_ROWS, axis=0)], axis=0)
-COLUMN_EDGE_DISTANCE = np.min([TILED_COLUMNS, np.flip(TILED_COLUMNS, axis=0)], axis=0)
+COLUMN_EDGE_DISTANCE = np.min(
+    [TILED_COLUMNS, np.flip(TILED_COLUMNS, axis=0)], axis=0)
 ODDS = TILED_ROWS % 2
 
 FOURS = []
@@ -69,7 +70,7 @@
 for colour in range(COLOURS):
     for row in range(HEIGHT):
         disks_in_column = row ^ (row + 1)
-        yellow_disks = 2 ** (row + 3) if colour == YELLOW else 0
+        yellow_disks = 2**(row + 3) if colour == YELLOW else 0
         row_hash = disks_in_column | yellow_disks
         for column in range(WIDTH):
             row_column_hash = row_hash << (9 * column)

diff --git a/network.py b/network.py
@@ -16,30 +16,31 @@ def __init__(self, scope, use_symmetry):
                 [1, 2, HEIGHT, WIDTH],
             )
 
-            self.disks = tf.placeholder(
-                tf.float32, shape=[None, 2, HEIGHT, WIDTH], name="disks"
-            )
+            self.disks = tf.placeholder(tf.float32,
+                                        shape=[None, 2, HEIGHT, WIDTH],
+                                        name="disks")
 
-            self.empty = tf.placeholder(
-                tf.float32, shape=[None, HEIGHT, WIDTH], name="empty"
-            )
+            self.empty = tf.placeholder(tf.float32,
+                                        shape=[None, HEIGHT, WIDTH],
+                                        name="empty")
             empty = tf.expand_dims(self.empty, axis=1)
 
-            self.legal_moves = tf.placeholder(
-                tf.float32, shape=[None, HEIGHT, WIDTH], name="legal_moves"
-            )
+            self.legal_moves = tf.placeholder(tf.float32,
+                                              shape=[None, HEIGHT, WIDTH],
+                                              name="legal_moves")
             legal_moves = tf.expand_dims(self.legal_moves, axis=1)
 
-            self.threats = tf.placeholder(
-                tf.float32, shape=[None, 2, HEIGHT, WIDTH], name="threats"
-            )
+            self.threats = tf.placeholder(tf.float32,
+                                          shape=[None, 2, HEIGHT, WIDTH],
+                                          name="threats")
 
             constant_features = np.array(
                 [TILED_ROWS, ODDS, ROW_EDGE_DISTANCE, COLUMN_EDGE_DISTANCE],
                 dtype=np.float32,
             ).reshape([1, 4, HEIGHT, WIDTH])
             batch_size = tf.shape(self.turn)[0]
-            tiled_constant_features = tf.tile(constant_features, [batch_size, 1, 1, 1])
+            tiled_constant_features = tf.tile(constant_features,
+                                              [batch_size, 1, 1, 1])
 
             feature_planes = tf.concat(
                 [
@@ -55,11 +56,12 @@ def __init__(self, scope, use_symmetry):
 
             if use_symmetry:
                 # Interleave horizontally flipped position
-                feature_planes_shape = [-1] + feature_planes.shape.as_list()[1:]
+                feature_planes_shape = [-1
+                                        ] + feature_planes.shape.as_list()[1:]
                 flipped = tf.reverse(feature_planes, axis=[3])
                 feature_planes = tf.reshape(
-                    tf.stack([feature_planes, flipped], axis=1), feature_planes_shape
-                )
+                    tf.stack([feature_planes, flipped], axis=1),
+                    feature_planes_shape)
 
         with tf.name_scope("conv_layers"):
             if self.gpu_available():
@@ -106,9 +108,9 @@ def __init__(self, scope, use_symmetry):
                 name="final_conv",
             )
             disk_bias = tf.get_variable("disk_bias", shape=[TOTAL_DISKS])
-            self.conv_output = tf.add(
-                tf.contrib.layers.flatten(final_conv), disk_bias, name="conv_output"
-            )
+            self.conv_output = tf.add(tf.contrib.layers.flatten(final_conv),
+                                      disk_bias,
+                                      name="conv_output")
 
             self.conv_layers = [conv1, conv2, conv3, self.conv_output]
 
@@ -119,7 +121,8 @@ def gpu_available(self):
     @property
     def variables(self):
         # Add '/' to stop network-1 containing network-10 variables
-        return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope + "/")
+        return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
+                                 self.scope + "/")
 
     def assign(self, other):
         return [
@@ -135,12 +138,11 @@ def __init__(self, scope, temperature=1.0, reuse=None, use_symmetry=False):
 
             with tf.name_scope("policy"):
                 self.temperature = tf.placeholder_with_default(
-                    temperature, (), name="temperature"
-                )
+                    temperature, (), name="temperature")
 
-                disk_logits = tf.divide(
-                    self.conv_output, self.temperature, name="disk_logits"
-                )
+                disk_logits = tf.divide(self.conv_output,
+                                        self.temperature,
+                                        name="disk_logits")
 
                 if use_symmetry:
                     # Calculate average of actual and horizontally flipped position
@@ -151,7 +153,9 @@ def __init__(self, scope, temperature=1.0, reuse=None, use_symmetry=False):
                     )
                     disk_logits = tf.reshape(
                         tf.reduce_mean(
-                            tf.concat([normal, tf.reverse(flipped, axis=[3])], axis=1),
+                            tf.concat(
+                                [normal, tf.reverse(flipped, axis=[3])],
+                                axis=1),
                             axis=1,
                         ),
                         [-1, TOTAL_DISKS],
@@ -161,10 +165,8 @@ def __init__(self, scope, temperature=1.0, reuse=None, use_symmetry=False):
                 #   - Legal moves have positive logits
                 #   - Illegal moves have -ILLEGAL_PENALTY logits
                 legal_moves = tf.contrib.layers.flatten(self.legal_moves)
-                legal_disk_logits = (
-                    tf.nn.relu(disk_logits) * legal_moves
-                    + (legal_moves - 1) * ILLEGAL_PENALTY
-                )
+                legal_disk_logits = (tf.nn.relu(disk_logits) * legal_moves +
+                                     (legal_moves - 1) * ILLEGAL_PENALTY)
 
                 self.policy = tf.nn.softmax(legal_disk_logits, name="policy")
                 self.sample_move = tf.squeeze(
@@ -203,10 +205,12 @@ def __init__(self, scope, use_symmetry=False):
 
                 if use_symmetry:
                     # Calculate average of actual and horizontally flipped position
-                    self.value = tf.reduce_mean(
-                        tf.reshape(value, [-1, 2]), axis=1, name="value"
-                    )
+                    self.value = tf.reduce_mean(tf.reshape(value, [-1, 2]),
+                                                axis=1,
+                                                name="value")
                 else:
                     self.value = tf.squeeze(value, axis=1, name="value")
 
-                self.value_layers = self.conv_layers + [fully_connected, self.value]
+                self.value_layers = self.conv_layers + [
+                    fully_connected, self.value
+                ]
diff --git a/policy_training.py b/policy_training.py
@@ -23,42 +23,42 @@ def __init__(self, config):
         self.config = config
         self.run_dir = util.run_directory(config)
 
-        self.session = tf.Session(
-            config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
-        )
+        self.session = tf.Session(config=tf.ConfigProto(
+            gpu_options=tf.GPUOptions(allow_growth=True)))
 
         self.policy_network = PolicyNetwork("policy")
         self.policy_player = PolicyPlayer(self.policy_network, self.session)
-        util.restore_or_initialize_network(
-            self.session, self.run_dir, self.policy_network
-        )
+        util.restore_or_initialize_network(self.session, self.run_dir,
+                                           self.policy_network)
 
         # Train ops
         self.create_train_op(self.policy_network)
         self.writer = tf.summary.FileWriter(self.run_dir)
-        util.restore_or_initialize_scope(
-            self.session, self.run_dir, self.training_scope.name
-        )
+        util.restore_or_initialize_scope(self.session, self.run_dir,
+                                         self.training_scope.name)
 
         self.opponents = Opponents(
-            [RandomPlayer(), RandomThreatPlayer(), MaxThreatPlayer()]
-        )
+            [RandomPlayer(),
+             RandomThreatPlayer(),
+             MaxThreatPlayer()])
         self.opponents.restore_networks(self.session, self.run_dir)
 
     def create_train_op(self, policy_network):
         with tf.variable_scope("policy_training") as self.training_scope:
             self.move = tf.placeholder(tf.int32, shape=[None], name="move")
-            self.result = tf.placeholder(tf.float32, shape=[None], name="result")
+            self.result = tf.placeholder(tf.float32,
+                                         shape=[None],
+                                         name="result")
 
             policy = tf.reshape(policy_network.policy, [-1, HEIGHT, WIDTH])
             move = tf.expand_dims(tf.one_hot(self.move, WIDTH), axis=1)
             turn = util.turn_win(policy_network.turn)
             move_probability = tf.reduce_sum(policy * move, axis=[1, 2])
 
-            result_loss = -tf.reduce_mean(tf.log(move_probability) * turn * self.result)
+            result_loss = -tf.reduce_mean(
+                tf.log(move_probability) * turn * self.result)
             entropy_regularisation = -config.entropy * tf.reduce_mean(
-                policy_network.entropy
-            )
+                policy_network.entropy)
             loss = result_loss + entropy_regularisation
 
             optimizer = tf.train.AdamOptimizer(self.config.learning_rate)
@@ -95,17 +95,20 @@ def save(self):
 
     def play_games(self, opponent):
         # Create games
-        games = incomplete_games = [Game() for _ in range(self.config.batch_size)]
+        games = incomplete_games = [
+            Game() for _ in range(self.config.batch_size)
+        ]
 
         # Let opponent play first in half of the games
-        self.play_move(games[0 : len(games) // 2], opponent)
+        self.play_move(games[0:len(games) // 2], opponent)
         player = self.policy_player
 
         while incomplete_games:
             self.play_move(incomplete_games, player)
             player = self.policy_player if player != self.policy_player else opponent
             incomplete_games = [
-                game for game in incomplete_games if not game.position.gameover()
+                game for game in incomplete_games
+                if not game.position.gameover()
             ]
 
         return games
@@ -154,7 +157,8 @@ def train_games(self, opponent, games):
 
     def process_results(self, opponent, games, step, summary):
         win_rate = np.mean([game.policy_player_score for game in games])
-        average_moves = sum(len(game.moves) for game in games) / self.config.batch_size
+        average_moves = sum(len(game.moves)
+                            for game in games) / self.config.batch_size
 
         opponent_summary = tf.Summary()
         opponent_summary.value.add(
@@ -171,16 +175,13 @@ def process_results(self, opponent, games, step, summary):
 
         self.opponents.update_win_rate(opponent, win_rate)
 
-        print(
-            "Step %d. Opponent %s, win rate %.2f <%.2f>, %.2f moves"
-            % (
-                step,
-                opponent.name,
-                win_rate,
-                self.opponents.win_rates[opponent],
-                average_moves,
-            )
-        )
+        print("Step %d. Opponent %s, win rate %.2f <%.2f>, %.2f moves" % (
+            step,
+            opponent.name,
+            win_rate,
+            self.opponents.win_rates[opponent],
+            average_moves,
+        ))
 
     def create_new_opponent(self, name):
         # Create clone of policy_player
@@ -211,7 +212,8 @@ def decrease_win_rates(self):
 
     def update_win_rate(self, opponent, win_rate):
         # Win rate is a moving average
-        self.win_rates[opponent] = self.win_rates[opponent] * 0.9 + win_rate * 0.1
+        self.win_rates[
+            opponent] = self.win_rates[opponent] * 0.9 + win_rate * 0.1
 
     def all_beaten(self):
         result = True
@@ -222,32 +224,25 @@ def all_beaten(self):
     def choose_opponent(self):
         # More difficult opponents are chosen more often
         win_rates = np.maximum(list(self.win_rates.values()), 0.1)
-        probs = (1 / win_rates ** 2) - 1
+        probs = (1 / win_rates**2) - 1
         normalised_probs = probs / probs.sum()
-        return np.random.choice(list(self.win_rates.keys()), p=normalised_probs)
+        return np.random.choice(list(self.win_rates.keys()),
+                                p=normalised_probs)
 
     def next_network_name(self):
-        network_opponents = len(
-            [
-                opponent
-                for opponent in self.win_rates.keys()
-                if type(opponent) == PolicyPlayer
-            ]
-        )
+        network_opponents = len([
+            opponent for opponent in self.win_rates.keys()
+            if type(opponent) == PolicyPlayer
+        ])
         return "network-%d" % (network_opponents + 1)
 
     def save_opponent_stats(self, run_dir):
         with open(os.path.join(run_dir, "opponents"), "w") as f:
-            f.write(
-                "\n".join(
-                    [
-                        opponent.name + " " + str(win_rate)
-                        for opponent, win_rate in sorted(
-                            self.win_rates.items(), key=lambda x: x[1]
-                        )
-                    ]
-                )
-            )
+            f.write("\n".join([
+                opponent.name + " " + str(win_rate)
+                for opponent, win_rate in sorted(self.win_rates.items(),
+                                                 key=lambda x: x[1])
+            ]))
 
     def restore_networks(self, session, run_dir):
         opponents_file = os.path.join(run_dir, "opponents")
@@ -292,7 +287,8 @@ def move(self, move, policy_player_turn=False):
         self.positions.append(self.position)
         if self.position.gameover():
             self.result = self.position.result
-            self.policy_player_score = float(policy_player_turn) if self.result else 0.5
+            self.policy_player_score = float(
+                policy_player_turn) if self.result else 0.5
 
 
 def main(_):