spxtr · mikedanese · Jan 30, 2017 · spxtr · Jan 30, 2017 · mikedanese
diff --git a/p3/ddq.py b/p3/ddq.py
@@ -0,0 +1,26 @@
+import numpy as np
+import tensorflow as tf
+
+class Decider(object):
+    def __init__(self, shape):
+      self.observation_shape = shape
+
+    def observation_batch_shape(self, batch_size):
+        return tuple([batch_size] + list(self.observation_shape))
+
+    def create_variables(self):
+      pass
+
+    def act(self, sess, observation):
+        #assert observation.shape == self.observation_shape
+        a = tf.concat([
+          # buttons
+          tf.random_uniform([13]),
+          # directions
+          tf.reshape(tf.random_uniform([2,2]), [4]),
+          # sleeps
+          tf.random_uniform([2]),
+        ], 0)
+        a = tf.map_fn(lambda x: x - 0.95, a)
+        a = tf.nn.relu(a)
+        return sess.run(a)
diff --git a/p3/fox.py b/p3/fox.py
@@ -1,11 +1,40 @@
-import p3.pad
+import logging
+
+from p3.pad import Button, Trigger, Stick
+from p3.ddq import Decider
+
+ZERO = 0.0000001
+BUTTONS = {
+    Button.A: 0,
+    Button.B: 1,
+    Button.X: 2,
+    Button.Y: 3,
+    Button.Z: 4,
+    Button.L: 5,
+    Button.R: 6,
+    Button.D_UP: 7,
+    Button.D_DOWN: 8,
+    Button.D_LEFT: 9,
+    Button.D_RIGHT: 10,
+}
+TRIGGERS = {
+    Trigger.L: 11,
+    Trigger.R: 12,
+}
+STICKS = {
+    Stick.MAIN: 13,
+    Stick.C: 15,
+}
 
 class Fox:
-    def __init__(self):
+    def __init__(self, pad):
+        self.pad = pad
         self.action_list = []
         self.last_action = 0
+        self.decider = Decider([])
+        self.key_pressed = dict()
 
-    def advance(self, state, pad):
+    def advance(self, sess, state):
         while self.action_list:
             wait, func, args = self.action_list[0]
             if state.frame - self.last_action < wait:
@@ -16,14 +45,28 @@ def advance(self, state, pad):
                     func(*args)
                 self.last_action = state.frame
         else:
-            # Eventually this will point at some decision-making thing.
-            self.shinespam(pad)
-
-    def shinespam(self, pad):
-        self.action_list.append((0, pad.tilt_stick, [p3.pad.Stick.MAIN, 0.5, 0.0]))
-        self.action_list.append((0, pad.press_button, [p3.pad.Button.B]))
-        self.action_list.append((1, pad.release_button, [p3.pad.Button.B]))
-        self.action_list.append((0, pad.tilt_stick, [p3.pad.Stick.MAIN, 0.5, 0.5]))
-        self.action_list.append((0, pad.press_button, [p3.pad.Button.X]))
-        self.action_list.append((1, pad.release_button, [p3.pad.Button.X]))
-        self.action_list.append((1, None, []))
+            self.apply(self.decider.act(sess, self.state_to_observation(state)))
+
+    def apply(self, action):
+        for stick, i in STICKS.items():
+            self.action_list.append((0, self.pad.tilt_stick, [stick, action[i], action[i+1]]))
+
+        for trigger, i in TRIGGERS.items():
+            if action[i] > ZERO:
+                self.action_list.append((0, self.pad.press_trigger, [trigger, action[i]]))
+
+        for button, i in BUTTONS.items():
+            if action[i] > ZERO:
+                print("press: %s" % (button))
+                self.action_list.append((0, self.pad.press_button, [button]))
+
+        self.action_list.append((int(action[17]*30+15), None, []))
+
+        for button, i in BUTTONS.items():
+            if action[i] > ZERO:
+                self.action_list.append((0, self.pad.release_button, [button]))
+
+        self.action_list.append((int(action[18]*30+15), None, []))
+
+    def state_to_observation(self, state):
+        pass
diff --git a/p3/p3.py b/p3/p3.py
@@ -9,6 +9,8 @@
 import p3.state_manager
 import p3.stats
 
+import tensorflow as tf
+
 
 def find_dolphin_dir():
     """Attempts to find the dolphin user directory. None on failure."""
@@ -30,22 +32,24 @@ def write_locations(dolphin_dir, locations):
             print('Could not detect dolphin directory.')
             return
 
-def run(fox, state, sm, mw, pad, stats):
+def run(state, sm, mw, pad, stats):
     mm = p3.menu_manager.MenuManager()
-    while True:
-        last_frame = state.frame
-        res = next(mw)
-        if res is not None:
-            sm.handle(*res)
-        if state.frame > last_frame:
-            stats.add_frames(state.frame - last_frame)
-            start = time.time()
-            make_action(state, pad, mm, fox)
-            stats.add_thinking_time(time.time() - start)
+    fox = p3.fox.Fox(pad)
+    with tf.Session() as sess:
+        while True:
+            last_frame = state.frame
+            res = next(mw)
+            if res is not None:
+                sm.handle(*res)
+            if state.frame > last_frame:
+                stats.add_frames(state.frame - last_frame)
+                start = time.time()
+                make_action(state, pad, mm, fox, sess)
+                stats.add_thinking_time(time.time() - start)
 
-def make_action(state, pad, mm, fox):
+def make_action(state, pad, mm, fox, sess):
     if state.menu == p3.state.Menu.Game:
-        fox.advance(state, pad)
+        fox.advance(sess, state)
     elif state.menu == p3.state.Menu.Characters:
         mm.pick_fox(state, pad)
     elif state.menu == p3.state.Menu.Stages:
@@ -66,14 +70,12 @@ def main():
 
     stats = p3.stats.Stats()
 
-    fox = p3.fox.Fox()
-
     try:
         print('Start dolphin now. Press ^C to stop p3.')
         pad_path = dolphin_dir + '/Pipes/p3'
         mw_path = dolphin_dir + '/MemoryWatcher/MemoryWatcher'
         with p3.pad.Pad(pad_path) as pad, p3.memory_watcher.MemoryWatcher(mw_path) as mw:
-            run(fox, state, sm, mw, pad, stats)
+            run(state, sm, mw, pad, stats)
     except KeyboardInterrupt:
         print('Stopped')
         print(stats)

diff --git a/p3/pad.py b/p3/pad.py
@@ -8,13 +8,13 @@ class Button(enum.Enum):
     X = 2
     Y = 3
     Z = 4
-    START = 5
-    L = 6
-    R = 7
-    D_UP = 8
-    D_DOWN = 9
-    D_LEFT = 10
-    D_RIGHT = 11
+    L = 5
+    R = 6
+    D_UP = 7
+    D_DOWN = 8
+    D_LEFT = 9
+    D_RIGHT = 10
+    START = 11
 
 @enum.unique
 class Trigger(enum.Enum):