tbinetruy · dependabot · May 12, 2019 · May 19, 2019 · May 19, 2019 · May 19, 2019
diff --git a/flow/.vscode/launch.json b/flow/.vscode/launch.json
@@ -0,0 +1,15 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python : Fichier actuel",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}
diff --git a/flow/BaseIssyEnv.py b/flow/BaseIssyEnv.py
@@ -1,12 +1,14 @@
 import itertools
 import numpy as np
+from collections import OrderedDict
 
 from gym.spaces.box import Box
 from gym.spaces.discrete import Discrete
 
 from flow.envs import Env
 
 from Rewards import Rewards
+from States import States
 
 
 class BaseIssyEnv(Env):
@@ -36,31 +38,159 @@ class BaseIssyEnv(Env):
 
     def __init__(self, env_params, sim_params, scenario, simulator='traci'):
         super().__init__(env_params, sim_params, scenario, simulator)
-        beta = env_params.get_additional_param("beta")
+        self.beta = env_params.get_additional_param("beta")
+        self.tl_constraint_min = env_params.get_additional_param(
+            "tl_constraint_min")
+        self.tl_constraint_max = env_params.get_additional_param(
+            "tl_constraint_max")
         self.action_spec = env_params.get_additional_param("action_spec")
         self.algorithm = env_params.get_additional_param("algorithm")
-        self.model_params = dict(beta=beta, )
+        self.sim_step = env_params.get_additional_param("sim_step")
+        self.model_params = dict(beta=self.beta, )
         self.rewards = Rewards(self.k, self.action_spec)
+        self.states = States(self.k, self.beta)
+
+        self._init_obs_veh_acc()
+        self._init_obs_veh_wait_steps()
+        self._init_obs_tl_wait_steps()
 
         # Used for debug purposes
         self.current_timestep = 0
 
+    def _init_obs_veh_acc(self):
+        """Initializes the data structures that will store vehicle speeds and
+        accelerations"""
+        placeholder = 0.
+        self._obs_veh_vel = OrderedDict([('human_' + str(i), placeholder)
+                                         for i in range(self.beta)])
+        self.obs_veh_acc = OrderedDict([('human_' + str(i), placeholder)
+                                        for i in range(self.beta)])
+
+    def _update_obs_veh_acc(self):
+        """Updates the observed vehicle speed and acceleration data structures.
+        We do so by using an ordered dict to maintain column order across
+        timesteps. When vehicles are being re-spawned, we set their
+        acceleration to 0."""
+        placeholder = 0.
+        speed_odict = OrderedDict([('human_' + str(i), placeholder)
+                                   for i in range(self.beta)])
+        acc_odict = OrderedDict([('human_' + str(i), placeholder)
+                                 for i in range(self.beta)])
+
+        for id in self.get_observable_veh_ids():
+            speed_odict[id] = self.k.vehicle.get_speed(id)
+
+        for i, id in enumerate(self.get_observable_veh_ids()):
+            acc_odict[id] = (speed_odict[id] -
+                             self._obs_veh_vel[id]) / self.sim_step
+
+        self.obs_veh_acc = acc_odict
+        self._obs_veh_vel = speed_odict
+
+    def _init_obs_veh_wait_steps(self):
+        """Initializes attributes that will store the number of steps stayed
+        idle by the beta observable vehicles"""
+
+        # Because Sumo has not instantiated vehicles at this stage, we manually
+        # generate the name of controllable vehicles according to the Sumo
+        # naming convention.
+        # We store this list in an attribute since it is needed when updating
+        # `self.obs_veh_wait_steps` in the update loop and
+        # `self.k.vehicles.get_ids` will not list vehicles that are being re-
+        # routed (see `self._reroute_if_final_edge`).
+        self._all_obs_veh_names = ['human_' + str(i) for i in range(self.beta)]
+
+        # We instantiate a dictionary with veh_ids as keys and time steps spent
+        # idled as values. We set all values to 0 since this is an init.
+        self.obs_veh_wait_steps = {
+            veh_id: 0
+            for veh_id in self._all_obs_veh_names
+        }
+
+    def _init_obs_tl_wait_steps(self):
+        """Initializes attributes that will store the number of steps stayed
+        idle by the traffic lights"""
+
+        # Contrary to the observation of vehicules, the traffic light are
+        # already named
+        self._all_tl_names = self.action_spec.keys()
+
+        # We instantiate a dictionary with tl_ids as keys and time steps spent
+        # idled as values. We set all values to 0 since this is an init.
+        self.obs_tl_wait_steps = {
+            tl_id: {
+                'current_state': '',
+                'timer': 0
+            }
+            for tl_id in self._all_tl_names
+        }
+
     def map_action_to_tl_states(self, rl_actions):
         """Maps an rl_action list to new traffic light states based on
         `action_spec` or keeps current traffic light states as they are.
 
+        Since the shape of `rl_action` depends on `self.algorithm`, the
+        mapping from actions to new states is implemented for each algorithm.
+
         Parameters
-        ---------
-        rl_actions: [float] list of action probabilities of cardinality
-            `self.get_num_actions()`
+        ----------
+        rl_actions:
+            PPO:  [float] - list of action probabilities of length
+                 `self.get_num_actions()`. TODO: For the moment, only binary
+                 intersections are handled.
+            DQN: int - action number
+
+        Returns: [string]
+        -------
+            List of strings of length `self.action_spec.keys()` containing
+            the new state configuration for each intersection.
         """
-        identity_action = [tuple(
-            self.k.traffic_light.get_state(id)
-            for id in self.action_spec.keys()
-        )]
-        all_actions = list(itertools.product(
-            *list(self.action_spec.values()))) + identity_action
-        return all_actions[rl_actions]
+        new_state = []
+        if self.algorithm == "DQN":
+            # identity_action = [
+            #     tuple(
+            #         self.k.traffic_light.get_state(id)
+            #         for id in self.action_spec.keys())
+            # ]
+            all_actions = list(
+                itertools.product(
+                    *list(self.action_spec.values())))  # + identity_action
+            new_state = all_actions[rl_actions]
+        elif self.algorithm == "PPO":
+            new_state = [
+                v[int(rl_actions[i])]
+                for i, v in enumerate(list(self.action_spec.values()))
+            ]
+        else:
+            return NotImplementedError
+
+        # Don't update traffic lights that have not exceeded the timer
+        new_state = list(new_state)
+        for i, tl_id in enumerate(self.action_spec.keys()):
+            current_state = self.k.traffic_light.get_state(tl_id)
+            timer_value = self.obs_tl_wait_steps[tl_id]['timer']
+            if timer_value < self.tl_constraint_min:
+                new_state[i] = current_state
+            else:
+                # Pick new state if tl state hasn't changed in a while
+                cond_A = timer_value > self.tl_constraint_max
+                cond_B = new_state[i] == current_state
+                if cond_A and cond_B:
+                    possible_states = list(self.action_spec[tl_id])  # copy
+                    possible_states.remove(current_state)
+                    num_states = len(possible_states)
+                    if num_states:
+                        new_state_index = np.random.randint(num_states)
+                        new_state[i] = possible_states[new_state_index]
+
+                # Update state and timer if state changed
+                if new_state[i] is not current_state:
+                    self.obs_tl_wait_steps[tl_id] = {
+                        'current_state': new_state[i],
+                        'timer': 0
+                    }
+
+        return new_state
 
     def get_num_traffic_lights(self):
         """Counts the number of traffic lights by summing
@@ -76,12 +206,15 @@ def get_num_traffic_lights(self):
 
     def get_num_actions(self):
         """Calculates the number of possible actions by counting the
-        traffic light states based on `self.action_spec`. It counts
-        the cardinality of the cartesian product of all traffic light
-        states. In the DQN case, it also adds 1 to that product to account
+        traffic light states based on `self.action_spec`.
+
+        In the DQN case, it counts the cardinality of the cartesian product of
+        all traffic light states. It also adds 1 to that product to account
         for the "identity" action which keeps the traffic light states as they
         were in the last timestep.
 
+        In the PPO case, it returns the number of intersections.
+
         Returns
         -------
         Number of actions (int)
@@ -90,9 +223,9 @@ def get_num_actions(self):
         for k in self.action_spec.keys():
             count *= len(self.action_spec[k])
         if self.algorithm == "DQN":
-            return count + 1
+            return count  # + 1
         elif self.algorithm == "PPO":
-            return count
+            return len(self.action_spec.keys())
         else:
             return NotImplementedError
 
@@ -147,14 +280,52 @@ def _apply_rl_actions(self, rl_actions):
             new_state = "G" * len(old_state)
             self.k.traffic_light.set_state(tl_id, new_state)
 
+    def _update_obs_wait_steps(self):
+        """This method updates `self.obs_veh_wait_steps`.
+
+        Ex: If human_1 has been idled for 5 timesteps and human_2 is moving at
+        1km/h, then `self.obs_veh_wait_steps` = {'human_1': 5, 'human_2': 0}"""
+        self.obs_veh_wait_steps = {
+            veh_id: 0 if not self.k.vehicle.get_speed(veh_id) else
+            self.obs_veh_wait_steps[veh_id] + 1
+            for veh_id in self.get_observable_veh_ids()
+        }
+
+        # Because when vehicles are being rerouted, they will not appear in the
+        # list returned by `self.get_observable_veh_ids`, they will be left out
+        # of `self.obs_veh_wait_steps`. We patch the dictionary as follows to
+        # prevent key errors.
+        for k in self._all_obs_veh_names:
+            if k not in self.obs_veh_wait_steps:
+                self.obs_veh_wait_steps[k] = 0
+
+    def _increment_obs_tl_wait_steps(self):
+        """This method increments `self.obs_tl_wait_steps`."""
+        for tl_id in self.action_spec.keys():
+            self.obs_tl_wait_steps[tl_id]['timer'] += 1
+
     def additional_command(self):
-        """Used to insert vehicles that are on the exit edge and place them
-        back on their entrance edge. Gets executed at each time step.
+        """ Gets executed at each time step.
+
+        - updates how long observable vehicles have been waiting for.
+        - updates how long traffic lights have been in the same state for.
+        - Used to insert vehicles that are on the exit edge and place them
+        back on their entrance edge.
+        - It also colors the beta observable
+        vehicles on sumo's gui.
 
         See parent class for more information."""
+        self._update_obs_wait_steps()
+        self._increment_obs_tl_wait_steps()
+        self._update_obs_veh_acc()
+
         for veh_id in self.k.vehicle.get_ids():
             self._reroute_if_final_edge(veh_id)
 
+            # color beta observable vehicles
+            if 'human' in veh_id:
+                self.k.vehicle.set_color(veh_id, color=(255, 0, 0))
+
         # Used for debug purposes
         self.current_timestep += 1