Skip to content

Commit

Permalink
merge changes (cts198859#3)
Browse files Browse the repository at this point in the history
* update

* update traffic dynamics
  • Loading branch information
cts198859 authored May 21, 2018
1 parent 25fdff3 commit fddb9cf
Show file tree
Hide file tree
Showing 23 changed files with 15,205 additions and 3,198 deletions.
6 changes: 6 additions & 0 deletions .ipynb_checkpoints/result_plot-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ python3 main.py --base-dir [base_dir] train --config-dir [config_dir] --test-mod
~~~
`no_test` is suggested if no testing is needed during training, since it is time-consuming.

To access tensorboard during training, run
~~~
tensorboard --logdir=[base_dir]/log
~~~

To evaluate and compare trained agents, run
~~~
python3 main.py --base-dir [base_dir] evaluate --agents [agent names] --evaluate-metrics num_arrival_car --evaluate-seeds [seeds]
Expand Down
4 changes: 3 additions & 1 deletion agents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
class A2C:
def __init__(self, n_s, n_a, total_step, model_config, seed=0):
# load parameters
self.name = 'a2c'
self.n_agent = 1
self.reward_clip = model_config.getfloat('reward_clip')
self.reward_norm = model_config.getfloat('reward_norm')
Expand Down Expand Up @@ -98,7 +99,7 @@ def load(self, model_dir, checkpoint=None):
else:
save_file = 'checkpoint-' + str(int(checkpoint))
if save_file is not None:
self.saver.restore(sess, model_dir + save_file)
self.saver.restore(self.sess, model_dir + save_file)
logging.info('Checkpoint loaded: %s' % save_file)
return True
logging.error('Can not find old checkpoint for %s' % model_dir)
Expand All @@ -125,6 +126,7 @@ def add_transition(self, ob, action, reward, value, done):
class MultiA2C(A2C):
def __init__(self, n_s_ls, n_a_ls, total_step,
model_config, seed=0):
self.name = 'ma2c'
self.agents = []
self.n_agent = len(n_s_ls)
self.reward_clip = model_config.getfloat('reward_clip')
Expand Down
2 changes: 1 addition & 1 deletion config/config_global.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ episode_length_sec = 7200
; the normailization is based on typical values in sim
norm_car_num = 5.0
norm_car_speed = 2.0
num_extra_car_per_hour = 1000
num_extra_car_per_hour = 2000
; objective is chosen from max_flow, min_stop, min_wait
objective = max_flow
scenario = small_grid
Expand Down
2 changes: 1 addition & 1 deletion config/config_local.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ episode_length_sec = 7200
; the normailization is based on typical values in sim
norm_car_num = 5.0
norm_car_speed = 2.0
num_extra_car_per_hour = 1000
num_extra_car_per_hour = 2000
; objective is chosen from max_flow, min_stop, min_wait
objective = max_flow
scenario = small_grid
Expand Down
2 changes: 1 addition & 1 deletion config/config_neighbor.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ episode_length_sec = 7200
; the normailization is based on typical values in sim
norm_car_num = 5.0
norm_car_speed = 2.0
num_extra_car_per_hour = 1000
num_extra_car_per_hour = 2000
; objective is chosen from max_flow, min_stop, min_wait
objective = max_flow
scenario = small_grid
Expand Down
72 changes: 54 additions & 18 deletions envs/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
import subprocess
from sumolib import checkBinary
import traci
import xml.etree.cElementTree as ET

DEFAULT_PORT = 8000

SEC_IN_MS = 1000

class Phase:
def __init__(self):
Expand Down Expand Up @@ -38,8 +39,7 @@ def __init__(self, name, neighbor=[], control=False):

class TrafficSimulator:
def __init__(self, config, output_path, is_record, record_stats, port=0):
scenario = config.get('scenario')
self.name = scenario
self.name = config.get('scenario')
self.seed = config.getint('seed')
self.control_interval_sec = config.getint('control_interval_sec')
self.yellow_interval_sec = config.getint('yellow_interval_sec')
Expand All @@ -48,7 +48,6 @@ def __init__(self, config, output_path, is_record, record_stats, port=0):
self.sim_thread = port
self.obj = config.get('objective')
self.data_path = config.get('data_path')
self.scenario = scenario
self.coop_level = config.get('coop_level')
self.coop_gamma = config.getfloat('coop_gamma')
self.cur_episode = 0
Expand All @@ -59,11 +58,12 @@ def __init__(self, config, output_path, is_record, record_stats, port=0):
self.train_mode = True
test_seeds = config.get('test_seeds').split(',')
test_seeds = [int(s) for s in test_seeds]
self.init_data(is_record, record_stats, output_path)
self.init_test_seeds(test_seeds)
self._init_map()
self._init_sim()
self._init_nodes()
self.init_data(is_record, record_stats, output_path)
self.init_test_seeds(test_seeds)
self.terminate()

def _get_cross_phase(self, action, node, phase_type):
phase_num = self.nodes[node].phase_num
Expand Down Expand Up @@ -120,9 +120,9 @@ def _get_state(self):

# get the state vectors
for node in self.control_nodes:
if (self.coop_level == 'global') or (self.coop_level == 'local'):
if self.coop_level != 'neighbor':
state.append(self.nodes[node].state)
elif self.coop_level == 'neighbor':
else:
cur_state = [self.nodes[node].state]
# include both states and fingerprints of neighbors
for nnode in self.nodes[node].neighbor:
Expand Down Expand Up @@ -203,14 +203,23 @@ def _init_policy(self):
policy.append(np.array([p] * self.nodes[node].phase_num))
return policy

def _init_sim(self):
def _init_sim(self, gui=False):
sumocfg_file = self._init_sim_config()
command = [checkBinary('sumo'), '-c', sumocfg_file]
if gui:
app = 'sumo-gui'
else:
app = 'sumo'
command = [checkBinary(app), '-c', sumocfg_file]
command += ['--seed', str(self.seed)]
command += ['--remote-port', str(self.port)]
command += ['--no-step-log', 'True']
command += ['--time-to-teleport', '-1']
command += ['--time-to-teleport', '-1'] # disable teleport
command += ['--no-warnings', 'True']
command += ['--duration-log.disable', 'True']
# collect trip info if necessary
if self.is_record:
command += ['--tripinfo-output',
self.output_path + ('%s_%s_trip.xml' % (self.name, self.coop_level))]
subprocess.Popen(command)
self.sim = traci.connect(port=self.port)

Expand Down Expand Up @@ -269,6 +278,8 @@ def _measure_traffic_step(self):
num_out_car = self.sim.simulation.getArrivedNumber()
avg_waiting_time = np.mean([self.sim.vehicle.getWaitingTime(car) for car in cars])
avg_speed = np.mean(speeds)
# all trip-related measurements are not supported by traci,
# need to read from outputfile afterwards
cur_traffic = {'episode': self.cur_episode,
'time_sec': self.cur_sec,
'number_total_car': num_tot_car,
Expand All @@ -289,7 +300,7 @@ def _reset_state(self):
# prev action for yellow phase before each switch
if self.nodes[node].control:
self.nodes[node].prev_action = -1
# fingerprint is previous policy[:-1]
# fingerprint is previous policy[:-1]
self.nodes[node].num_fingerprint = self.nodes[node].phase_num - 1
num_state, speed_mask = self._get_cross_state_num(node)
self.nodes[node].state = np.zeros(num_state)
Expand Down Expand Up @@ -325,13 +336,33 @@ def _transfer_action(self, action):
action_ls.append(action)
return action_ls

def collect_tripinfo(self):
# read trip xml, has to be called externally to get complete file
trip_file = self.output_path + ('%s_%s_trip.xml' % (self.name, self.coop_level))
tree = ET.ElementTree(file=trip_file)
for child in tree.getroot():
cur_trip = child.attrib
cur_dict = {}
cur_dict['episode'] = self.cur_episode
cur_dict['id'] = cur_trip['id']
cur_dict['depart_sec'] = cur_trip['depart']
cur_dict['arrival_sec'] = cur_trip['arrival']
cur_dict['duration_sec'] = cur_trip['duration']
cur_dict['wait_step'] = cur_trip['waitSteps']
cur_dict['wait_sec'] = cur_trip['timeLoss']
self.trip_data.append(cur_dict)
# delete the current xml
cmd = 'rm ' + trip_file
subprocess.check_call(cmd, shell=True)

def init_data(self, is_record, record_stats, output_path):
self.is_record = is_record
self.record_stats = record_stats
self.output_path = output_path
if self.is_record:
self.traffic_data = []
self.control_data = []
self.trip_data = []
if self.record_stats:
self.car_num_stat = []
self.car_speed_stat = []
Expand All @@ -344,15 +375,18 @@ def output_data(self):
if not self.is_record:
logging.error('Env: no record to output!')
control_data = pd.DataFrame(self.control_data)
control_data.to_csv(self.output_path + 'control.csv')
control_data.to_csv(self.output_path + ('%s_%s_control.csv' % (self.name, self.coop_level)))
traffic_data = pd.DataFrame(self.traffic_data)
traffic_data.to_csv(self.output_path + 'traffic.csv')
traffic_data.to_csv(self.output_path + ('%s_%s_traffic.csv' % (self.name, self.coop_level)))
trip_data = pd.DataFrame(self.trip_data)
trip_data.to_csv(self.output_path + ('%s_%s_trip.csv' % (self.name, self.coop_level)))

def reset(self, test_ind=0):
self.terminate()
# have to terminate previous sim before calling reset
self._reset_state()
if not self.train_mode:
self.seed = self.test_seeds[test_ind]
# self._init_sim(gui=True)
self._init_sim()
# next environment random condition should be different
self.seed += 10
Expand Down Expand Up @@ -384,6 +418,7 @@ def step(self, action):
action_str = ','.join([str(int(a)) for a in action])
reward_str = ','.join([str(r) for r in reward])
cur_control = {'episode': self.cur_episode,
'time_sec': self.cur_sec,
'step': self.cur_sec / self.control_interval_sec,
'action': action_str,
'reward': reward_str}
Expand All @@ -392,7 +427,7 @@ def step(self, action):
if self.coop_level == 'global':
reward = global_reward
elif self.coop_level == 'local':
# global reward
# global reward
new_reward = [global_reward] * len(reward)
reward = np.array(new_reward)
elif self.coop_level == 'neighbor':
Expand All @@ -405,11 +440,12 @@ def step(self, action):
continue
if nnode in self.nodes[node].neighbor:
cur_reward += self.coop_gamma * reward[i]
else:
elif self.name == 'small_grid':
# in small grid, agent is at most 2 steps away
cur_reward += (self.coop_gamma ** 2) * reward[i]
# TODO: step decay in large grid
new_reward.append(cur_reward)
reward = np.array(new_reward)
# TODO: neighbor uses spatially discounted reward
return state, reward, done, global_reward

def update_fingerprint(self, policy):
Expand Down
42 changes: 33 additions & 9 deletions envs/small_grid_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import matplotlib.pyplot as plt
import os
import seaborn as sns
import time
from envs.env import Phase, TrafficSimulator
from small_grid.data.build_file import gen_rou_file

Expand All @@ -17,6 +18,8 @@

STATE_MEAN_MASKS = {'in_car': False, 'in_speed': True, 'out_car': False}
STATE_NAMES = ['in_car', 'in_speed', 'out_car']
STATE_PHASE_MAP = {'nt1': [0, 1, 2], 'nt2': [1, 0], 'nt3': [1, 0],
'nt4': [1, 0], 'nt5': [1, 0], 'nt6': [1, 0]}


class SmallGridPhase(Phase):
Expand All @@ -36,8 +39,9 @@ def __init__(self):
self.phases = {2: two_phase, 3: three_phase}


class NaiveController:
class FixedController:
def __init__(self, num_node_3phase=1, num_node_2phase=5, switch_step=2):
self.name = 'naive'
self.phase_3 = 0
self.phase_2 = 0
self.num_3 = num_node_3phase
Expand All @@ -46,7 +50,7 @@ def __init__(self, num_node_3phase=1, num_node_2phase=5, switch_step=2):
self.step_3 = switch_step
self.step_2 = switch_step

def act(self, state=None):
def forward(self, ob=None, done=False, output_type=''):
if not self.step_3:
self.phase_3 = (self.phase_3 + 1) % 3
self.step_3 = self.switch_step - 1
Expand All @@ -59,8 +63,24 @@ def act(self, state=None):
self.step_2 -= 1
return np.array([self.phase_3] * self.num_3 + [self.phase_2] * self.num_2)

def explore(self, state=None):
return self.act()

class SmallGridController:
def __init__(self, nodes):
self.name = 'naive'
self.nodes = nodes
self.phase = SmallGridPhase()

def forward(self, obs):
actions = []
for ob, node in zip(obs, self.nodes):
actions.append(self.greedy(ob, node))
return actions

def greedy(self, ob, node):
# hard code the mapping from state to number of cars
phases = STATE_PHASE_MAP[node]
in_cars = ob[:len(phases)]
return phases[np.argmax(in_cars)]


class SmallGridEnv(TrafficSimulator):
Expand Down Expand Up @@ -101,18 +121,22 @@ def plot_cdf(X, c='b', label=None):

if __name__ == '__main__':
config = configparser.ConfigParser()
config.read('./config/config.ini')
config.read('./config/config_local.ini')
base_dir = './output_result/'
if not os.path.exists(base_dir):
os.mkdir(base_dir)
env = SmallGridEnv(config['ENV_CONFIG'], 0, base_dir, is_record=False, record_stat=True)
env.reset()
controller = NaiveController()
env = SmallGridEnv(config['ENV_CONFIG'], 0, base_dir, is_record=True, record_stat=True)
ob = env.reset()
controller = SmallGridController()
rewards = []
while True:
_, reward, done = env.step(controller.act())
next_ob, reward, done, _ = env.step(controller.forward(ob))
rewards.append(np.mean(reward))
if done:
break
ob = next_ob
env.plot_stat(np.array(rewards))
env.terminate()
time.sleep(2)
env.collect_tripinfo()
env.output_data()
Loading

0 comments on commit fddb9cf

Please sign in to comment.