-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
171 lines (149 loc) · 5.7 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
import marlo
import json
import numpy as np
import tensorflow as tf
import baselines.common.tf_util as U
from models.lstm_burn_in.evaluation.build_eval_action import build_eval_act, act_reshape
from models.lstm_burn_in.utils import reformat_obs, env_detector
def get_join_tokens():
if marlo.is_grading():
"""
In the crowdAI Evaluation environment obtain the join_tokens
from the evaluator
the `params` parameter passed to the `evaluator_join_token` only allows
the following keys :
"seed",
"tick_length",
"max_retries",
"retry_sleep",
"step_sleep",
"skip_steps",
"videoResolution",
"continuous_to_discrete",
"allowContinuousMovement",
"allowDiscreteMovement",
"allowAbsoluteMovement",
"add_noop_command",
"comp_all_commands"
# TODO: Add this to the official documentation ?
# Help Wanted :D Pull Requests welcome :D
"""
join_tokens = marlo.evaluator_join_token(params={})
else:
"""
When debugging locally,
Please ensure that you have a Minecraft client running on port 10000 and 10001
by doing :
$MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000
$MALMO_MINECRAFT_ROOT/launchClient.sh -port 10001
"""
ip = '127.0.0.1'
# ip = '192.168.2.78'
print("Generating join tokens locally...")
client_pool = [(ip, 11000), (ip, 11001)]
# env = 'MarLo-MobchaseTrain2-v0'
# env = "MarLo-BuildbattleTrainX-v0"
# env= "MarLo-TreasurehuntTrainX-v0"
join_tokens = marlo.make('MarLo-TreasurehuntTrain1-v0',
params={
"client_pool": client_pool,
"agent_names" : [
"MarLo-Agent-0",
"MarLo-Agent-1"
]
})
return join_tokens
@marlo.threaded
def run_agent(env, agent_id, sess, act):
"""
Where agent_id is an integral number starting from 0
In case, you have requested GPUs, then the agent_id will match
the GPU device id assigneed to this agent.
"""
def zero_state(lstm_units=512):
state = np.zeros((2, 1, lstm_units))
return state
obs = env.reset()
obs = reformat_obs(obs)
done = False
# count = 0
lstm_state = zero_state()
with sess.as_default():
while not done:
a, lstm_state = act(obs[None], lstm_state)
obs, reward, done, info = env.step(a[0])
obs = reformat_obs(obs)
# count += 1
# print("agent_id:{}, action:{}, reward:{}, done:{}, count:{}".format(agent_id, a, reward, done, count))
# It is important to do this env.close()
env.close()
def run_episode():
"""
Single episode run
"""
join_tokens = get_join_tokens()
# When the required number of episodes are evaluated
# The evaluator returns False for join_tokens
if not join_tokens:
return
config = tf.ConfigProto(device_count={'GPU': 0})
sess = tf.Session(config=config)
"""
NOTE: If instead of a dynamic loop, you hard code the run_agent
function calls, then the evaluation of your code will fail in case
of a tournament, where multiple submissions can control different agents
in the same game.
"""
# set environments and policies ------------------------------------
thread_handlers = []
envs = []
act_fs = []
agent_ids = []
params_s = []
set_params_fs = []
# set environments and networks
param_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models/lstm_burn_in/evaluation/parameters')
print(param_dir)
for _idx, token in enumerate(join_tokens):
env = marlo.init(token)
env_name = env_detector(env) # Buildbattle, Treasurehunt or Mobchase
envs.append(env)
agent_ids.append(_idx)
num_action = 6 if env_name == "Mobchase" else 8
act_f, set_params_f = build_eval_act(num_actions=num_action, scope=env_name+str(_idx))
act_f = act_reshape(act_f, env_name)
act_fs.append(act_f)
set_params_fs.append(set_params_f)
param_file = os.path.join(param_dir, env_name+'.npy')
params_s.append(np.load(param_file))
# set trained parameters
with sess.as_default():
U.initialize()
for i in range(len(set_params_fs)):
# print("load params {}".format(i))
set_params_fs[i](*params_s[i])
# run agents
for env, _idx, act in zip(envs, agent_ids, act_fs):
# Run agent-N on a separate thread
thread_handler = run_agent(env, _idx, sess, act)
# Accumulate thread handlers
thread_handlers.append(thread_handler)
# Wait for threads to complete or raise an exception
marlo.utils.join_all(thread_handlers)
print("Episode Run Complete")
if __name__ == "__main__":
"""
In case of debugging locally, run the episode just once
and in case of when the agent is being evaluated, continue
running episodes for as long as the evaluator keeps supplying
join_tokens.
"""
if not marlo.is_grading():
print("Running single episode...")
run_episode()
else:
while True:
run_episode()