merge changes (cts198859#3)

* update * update traffic dynamics
MARL-CEE-UW · May 21, 2018 · fddb9cf · fddb9cf
1 parent 25fdff3
commit fddb9cf
Show file tree

Hide file tree

Showing 23 changed files with 15,205 additions and 3,198 deletions.
diff --git a/.ipynb_checkpoints/result_plot-checkpoint.ipynb b/.ipynb_checkpoints/result_plot-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/README.md b/README.md
@@ -27,6 +27,11 @@ python3 main.py --base-dir [base_dir] train --config-dir [config_dir] --test-mod
 ~~~
 `no_test` is suggested if no testing is needed during training, since it is time-consuming.
 
+To access tensorboard during training, run
+~~~
+tensorboard --logdir=[base_dir]/log
+~~~
+
 To evaluate and compare trained agents, run
 ~~~
 python3 main.py --base-dir [base_dir] evaluate --agents [agent names] --evaluate-metrics num_arrival_car --evaluate-seeds [seeds]

diff --git a/agents/models.py b/agents/models.py
@@ -7,6 +7,7 @@
 class A2C:
     def __init__(self, n_s, n_a, total_step, model_config, seed=0):
         # load parameters
+        self.name = 'a2c'
         self.n_agent = 1
         self.reward_clip = model_config.getfloat('reward_clip')
         self.reward_norm = model_config.getfloat('reward_norm')
@@ -98,7 +99,7 @@ def load(self, model_dir, checkpoint=None):
             else:
                 save_file = 'checkpoint-' + str(int(checkpoint))
         if save_file is not None:
-            self.saver.restore(sess, model_dir + save_file)
+            self.saver.restore(self.sess, model_dir + save_file)
             logging.info('Checkpoint loaded: %s' % save_file)
             return True
         logging.error('Can not find old checkpoint for %s' % model_dir)
@@ -125,6 +126,7 @@ def add_transition(self, ob, action, reward, value, done):
 class MultiA2C(A2C):
     def __init__(self, n_s_ls, n_a_ls, total_step,
                  model_config, seed=0):
+        self.name = 'ma2c'
         self.agents = []
         self.n_agent = len(n_s_ls)
         self.reward_clip = model_config.getfloat('reward_clip')

diff --git a/config/config_global.ini b/config/config_global.ini
@@ -36,7 +36,7 @@ episode_length_sec = 7200
 ; the normailization is based on typical values in sim
 norm_car_num = 5.0
 norm_car_speed = 2.0
-num_extra_car_per_hour = 1000
+num_extra_car_per_hour = 2000
 ; objective is chosen from max_flow, min_stop, min_wait
 objective = max_flow
 scenario = small_grid

diff --git a/config/config_local.ini b/config/config_local.ini
@@ -36,7 +36,7 @@ episode_length_sec = 7200
 ; the normailization is based on typical values in sim
 norm_car_num = 5.0
 norm_car_speed = 2.0
-num_extra_car_per_hour = 1000
+num_extra_car_per_hour = 2000
 ; objective is chosen from max_flow, min_stop, min_wait
 objective = max_flow
 scenario = small_grid

diff --git a/config/config_neighbor.ini b/config/config_neighbor.ini
@@ -36,7 +36,7 @@ episode_length_sec = 7200
 ; the normailization is based on typical values in sim
 norm_car_num = 5.0
 norm_car_speed = 2.0
-num_extra_car_per_hour = 1000
+num_extra_car_per_hour = 2000
 ; objective is chosen from max_flow, min_stop, min_wait
 objective = max_flow
 scenario = small_grid

diff --git a/envs/env.py b/envs/env.py
@@ -8,9 +8,10 @@
 import subprocess
 from sumolib import checkBinary
 import traci
+import xml.etree.cElementTree as ET
 
 DEFAULT_PORT = 8000
-
+SEC_IN_MS = 1000
 
 class Phase:
     def __init__(self):
@@ -38,8 +39,7 @@ def __init__(self, name, neighbor=[], control=False):
 
 class TrafficSimulator:
     def __init__(self, config, output_path, is_record, record_stats, port=0):
-        scenario = config.get('scenario')
-        self.name = scenario
+        self.name = config.get('scenario')
         self.seed = config.getint('seed')
         self.control_interval_sec = config.getint('control_interval_sec')
         self.yellow_interval_sec = config.getint('yellow_interval_sec')
@@ -48,7 +48,6 @@ def __init__(self, config, output_path, is_record, record_stats, port=0):
         self.sim_thread = port
         self.obj = config.get('objective')
         self.data_path = config.get('data_path')
-        self.scenario = scenario
         self.coop_level = config.get('coop_level')
         self.coop_gamma = config.getfloat('coop_gamma')
         self.cur_episode = 0
@@ -59,11 +58,12 @@ def __init__(self, config, output_path, is_record, record_stats, port=0):
         self.train_mode = True
         test_seeds = config.get('test_seeds').split(',')
         test_seeds = [int(s) for s in test_seeds]
+        self.init_data(is_record, record_stats, output_path)
+        self.init_test_seeds(test_seeds)
         self._init_map()
         self._init_sim()
         self._init_nodes()
-        self.init_data(is_record, record_stats, output_path)
-        self.init_test_seeds(test_seeds)
+        self.terminate()
 
     def _get_cross_phase(self, action, node, phase_type):
         phase_num = self.nodes[node].phase_num
@@ -120,9 +120,9 @@ def _get_state(self):
 
         # get the state vectors
         for node in self.control_nodes:
-            if (self.coop_level == 'global') or (self.coop_level == 'local'):
+            if self.coop_level != 'neighbor':
                 state.append(self.nodes[node].state)
-            elif self.coop_level == 'neighbor':
+            else:
                 cur_state = [self.nodes[node].state]
                 # include both states and fingerprints of neighbors
                 for nnode in self.nodes[node].neighbor:
@@ -203,14 +203,23 @@ def _init_policy(self):
             policy.append(np.array([p] * self.nodes[node].phase_num))
         return policy
 
-    def _init_sim(self):
+    def _init_sim(self, gui=False):
         sumocfg_file = self._init_sim_config()
-        command = [checkBinary('sumo'), '-c', sumocfg_file]
+        if gui:
+            app = 'sumo-gui'
+        else:
+            app = 'sumo'
+        command = [checkBinary(app), '-c', sumocfg_file]
         command += ['--seed', str(self.seed)]
         command += ['--remote-port', str(self.port)]
         command += ['--no-step-log', 'True']
-        command += ['--time-to-teleport', '-1']
+        command += ['--time-to-teleport', '-1'] # disable teleport
         command += ['--no-warnings', 'True']
+        command += ['--duration-log.disable', 'True']
+        # collect trip info if necessary
+        if self.is_record:
+            command += ['--tripinfo-output',
+                        self.output_path + ('%s_%s_trip.xml' % (self.name, self.coop_level))]
         subprocess.Popen(command)
         self.sim = traci.connect(port=self.port)
 
@@ -269,6 +278,8 @@ def _measure_traffic_step(self):
         num_out_car = self.sim.simulation.getArrivedNumber()
         avg_waiting_time = np.mean([self.sim.vehicle.getWaitingTime(car) for car in cars])
         avg_speed = np.mean(speeds)
+        # all trip-related measurements are not supported by traci,
+        # need to read from outputfile afterwards
         cur_traffic = {'episode': self.cur_episode,
                        'time_sec': self.cur_sec,
                        'number_total_car': num_tot_car,
@@ -289,7 +300,7 @@ def _reset_state(self):
             # prev action for yellow phase before each switch
             if self.nodes[node].control:
                 self.nodes[node].prev_action = -1
-                # fingerprint is previous policy[:-1]  
+                # fingerprint is previous policy[:-1]
                 self.nodes[node].num_fingerprint = self.nodes[node].phase_num - 1
             num_state, speed_mask = self._get_cross_state_num(node)
             self.nodes[node].state = np.zeros(num_state)
@@ -325,13 +336,33 @@ def _transfer_action(self, action):
         action_ls.append(action)
         return action_ls
 
+    def collect_tripinfo(self):
+        # read trip xml, has to be called externally to get complete file
+        trip_file = self.output_path + ('%s_%s_trip.xml' % (self.name, self.coop_level))
+        tree = ET.ElementTree(file=trip_file)
+        for child in tree.getroot():
+            cur_trip = child.attrib
+            cur_dict = {}
+            cur_dict['episode'] = self.cur_episode
+            cur_dict['id'] = cur_trip['id']
+            cur_dict['depart_sec'] = cur_trip['depart']
+            cur_dict['arrival_sec'] = cur_trip['arrival']
+            cur_dict['duration_sec'] = cur_trip['duration']
+            cur_dict['wait_step'] = cur_trip['waitSteps']
+            cur_dict['wait_sec'] = cur_trip['timeLoss']
+            self.trip_data.append(cur_dict)
+        # delete the current xml
+        cmd = 'rm ' + trip_file
+        subprocess.check_call(cmd, shell=True)
+
     def init_data(self, is_record, record_stats, output_path):
         self.is_record = is_record
         self.record_stats = record_stats
         self.output_path = output_path
         if self.is_record:
             self.traffic_data = []
             self.control_data = []
+            self.trip_data = []
         if self.record_stats:
             self.car_num_stat = []
             self.car_speed_stat = []
@@ -344,15 +375,18 @@ def output_data(self):
         if not self.is_record:
             logging.error('Env: no record to output!')
         control_data = pd.DataFrame(self.control_data)
-        control_data.to_csv(self.output_path + 'control.csv')
+        control_data.to_csv(self.output_path + ('%s_%s_control.csv' % (self.name, self.coop_level)))
         traffic_data = pd.DataFrame(self.traffic_data)
-        traffic_data.to_csv(self.output_path + 'traffic.csv')
+        traffic_data.to_csv(self.output_path + ('%s_%s_traffic.csv' % (self.name, self.coop_level)))
+        trip_data = pd.DataFrame(self.trip_data)
+        trip_data.to_csv(self.output_path + ('%s_%s_trip.csv' % (self.name, self.coop_level)))
 
     def reset(self, test_ind=0):
-        self.terminate()
+        # have to terminate previous sim before calling reset
         self._reset_state()
         if not self.train_mode:
             self.seed = self.test_seeds[test_ind]
+        # self._init_sim(gui=True)
         self._init_sim()
         # next environment random condition should be different
         self.seed += 10
@@ -384,6 +418,7 @@ def step(self, action):
             action_str = ','.join([str(int(a)) for a in action])
             reward_str = ','.join([str(r) for r in reward])
             cur_control = {'episode': self.cur_episode,
+                           'time_sec': self.cur_sec,
                            'step': self.cur_sec / self.control_interval_sec,
                            'action': action_str,
                            'reward': reward_str}
@@ -392,7 +427,7 @@ def step(self, action):
         if self.coop_level == 'global':
             reward = global_reward
         elif self.coop_level == 'local':
-            # global reward 
+            # global reward
             new_reward = [global_reward] * len(reward)
             reward = np.array(new_reward)
         elif self.coop_level == 'neighbor':
@@ -405,11 +440,12 @@ def step(self, action):
                         continue
                     if nnode in self.nodes[node].neighbor:
                         cur_reward += self.coop_gamma * reward[i]
-                    else:
+                    elif self.name == 'small_grid':
+                        # in small grid, agent is at most 2 steps away
                         cur_reward += (self.coop_gamma ** 2) * reward[i]
+                    # TODO: step decay in large grid
                 new_reward.append(cur_reward)
             reward = np.array(new_reward)
-        # TODO: neighbor uses spatially discounted reward
         return state, reward, done, global_reward
 
     def update_fingerprint(self, policy):

diff --git a/envs/small_grid_env.py b/envs/small_grid_env.py
@@ -3,6 +3,7 @@
 import matplotlib.pyplot as plt
 import os
 import seaborn as sns
+import time
 from envs.env import Phase, TrafficSimulator
 from small_grid.data.build_file import gen_rou_file
 
@@ -17,6 +18,8 @@
 
 STATE_MEAN_MASKS = {'in_car': False, 'in_speed': True, 'out_car': False}
 STATE_NAMES = ['in_car', 'in_speed', 'out_car']
+STATE_PHASE_MAP = {'nt1': [0, 1, 2], 'nt2': [1, 0], 'nt3': [1, 0],
+                   'nt4': [1, 0], 'nt5': [1, 0], 'nt6': [1, 0]}
 
 
 class SmallGridPhase(Phase):
@@ -36,8 +39,9 @@ def __init__(self):
         self.phases = {2: two_phase, 3: three_phase}
 
 
-class NaiveController:
+class FixedController:
     def __init__(self, num_node_3phase=1, num_node_2phase=5, switch_step=2):
+        self.name = 'naive'
         self.phase_3 = 0
         self.phase_2 = 0
         self.num_3 = num_node_3phase
@@ -46,7 +50,7 @@ def __init__(self, num_node_3phase=1, num_node_2phase=5, switch_step=2):
         self.step_3 = switch_step
         self.step_2 = switch_step
 
-    def act(self, state=None):
+    def forward(self, ob=None, done=False, output_type=''):
         if not self.step_3:
             self.phase_3 = (self.phase_3 + 1) % 3
             self.step_3 = self.switch_step - 1
@@ -59,8 +63,24 @@ def act(self, state=None):
             self.step_2 -= 1
         return np.array([self.phase_3] * self.num_3 + [self.phase_2] * self.num_2)
 
-    def explore(self, state=None):
-        return self.act()
+
+class SmallGridController:
+    def __init__(self, nodes):
+        self.name = 'naive'
+        self.nodes = nodes
+        self.phase = SmallGridPhase()
+
+    def forward(self, obs):
+        actions = []
+        for ob, node in zip(obs, self.nodes):
+            actions.append(self.greedy(ob, node))
+        return actions
+
+    def greedy(self, ob, node):
+        # hard code the mapping from state to number of cars
+        phases = STATE_PHASE_MAP[node]
+        in_cars = ob[:len(phases)]
+        return phases[np.argmax(in_cars)]
 
 
 class SmallGridEnv(TrafficSimulator):
@@ -101,18 +121,22 @@ def plot_cdf(X, c='b', label=None):
 
 if __name__ == '__main__':
     config = configparser.ConfigParser()
-    config.read('./config/config.ini')
+    config.read('./config/config_local.ini')
     base_dir = './output_result/'
     if not os.path.exists(base_dir):
         os.mkdir(base_dir)
-    env = SmallGridEnv(config['ENV_CONFIG'], 0, base_dir, is_record=False, record_stat=True)
-    env.reset()
-    controller = NaiveController()
+    env = SmallGridEnv(config['ENV_CONFIG'], 0, base_dir, is_record=True, record_stat=True)
+    ob = env.reset()
+    controller = SmallGridController()
     rewards = []
     while True:
-        _, reward, done = env.step(controller.act())
+        next_ob, reward, done, _ = env.step(controller.forward(ob))
         rewards.append(np.mean(reward))
         if done:
             break
+        ob = next_ob
     env.plot_stat(np.array(rewards))
     env.terminate()
+    time.sleep(2)
+    env.collect_tripinfo()
+    env.output_data()