Skip to content

Commit

Permalink
Merge pull request #26 from MIMUW-RL/jaco
Browse files Browse the repository at this point in the history
keyboard agent for measuring human score
  • Loading branch information
dzako authored Jun 27, 2022
2 parents 6e02281 + ab5dae3 commit 8722c54
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 16 deletions.
4 changes: 0 additions & 4 deletions gym_space/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@
kwargs={
"ship_steering": 1,
"ship_moi": 0.01,
"reward_value": 0,
"rad_penalty_C": 2,
"numerator_C": 0.01,
"act_penalty_C": 0.5,
Expand All @@ -102,7 +101,6 @@
kwargs={
"ship_steering": 1,
"ship_moi": 0.01,
"reward_value": 0,
"rad_penalty_C": 2,
"numerator_C": 0.01,
"act_penalty_C": 0.5,
Expand All @@ -121,7 +119,6 @@
kwargs={
"ship_steering": 1,
"ship_moi": 0.01,
"reward_value": 0,
"rad_penalty_C": 2,
"numerator_C": 0.01,
"act_penalty_C": 0.5,
Expand All @@ -140,7 +137,6 @@
kwargs={
"ship_steering": 1,
"ship_moi": 0.01,
"reward_value": 0,
"rad_penalty_C": 2,
"numerator_C": 0.01,
"act_penalty_C": 0.5,
Expand Down
60 changes: 48 additions & 12 deletions keyboard_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np

if __name__ == "__main__":
total_rewards = []
episodes = 5

gym.envs.register(
id=f"KeplerDiscrete-v0",
Expand All @@ -25,18 +27,49 @@
)
#env = gym.make("KeplerDiscrete-v0")

gym.envs.register(
id="GoalDiscrete2-v0",
entry_point="gym_space.envs.goal:GoalDiscreteEnv",
max_episode_steps = 500,
kwargs={
"n_planets": 2,
"ship_steering": 1,
"ship_moi": 0.01,
"survival_reward_scale": 0.2,
"goal_vel_reward_scale": 5.0,
"safety_reward_scale": 10.0,
"goal_sparse_reward": 5.0,
"max_engine_force": 1,
},
)
gym.envs.register(
id="GoalDiscrete3-v0",
entry_point="gym_space.envs.goal:GoalDiscreteEnv",
max_episode_steps = 500,
kwargs={
"n_planets": 3,
"ship_steering": 1,
"ship_moi": 0.01,
"survival_reward_scale": 0.0,
"goal_vel_reward_scale": 1.0,
"safety_reward_scale": 2.0,
"goal_sparse_reward": 10.0,
"max_engine_force": 0.4,
"survival_reward_scale": 0.2,
"goal_vel_reward_scale": 5.0,
"safety_reward_scale": 10.0,
"goal_sparse_reward": 5.0,
"max_engine_force": 1,
},
)
gym.envs.register(
id="GoalDiscrete4-v0",
entry_point="gym_space.envs.goal:GoalDiscreteEnv",
max_episode_steps = 500,
kwargs={
"n_planets": 4,
"ship_steering": 1,
"ship_moi": 0.01,
"survival_reward_scale": 0.2,
"goal_vel_reward_scale": 5.0,
"safety_reward_scale": 10.0,
"goal_sparse_reward": 5.0,
"max_engine_force": 1,
},
)
env = gym.make(f"GoalDiscrete3-v0")
Expand Down Expand Up @@ -86,6 +119,7 @@ def key_release(key, mod):
env.unwrapped.viewer.window.on_key_press = key_press
env.unwrapped.viewer.window.on_key_release = key_release


def rollout(env):
global human_agent_action, human_wants_restart, human_sets_pause
human_wants_restart = False
Expand All @@ -95,6 +129,7 @@ def rollout(env):
total_reward = 0
total_timesteps = 0
k = 0

while 1:
if not skip:
# print("taking action {}".format(human_agent_action))
Expand All @@ -120,19 +155,20 @@ def rollout(env):
break
while human_sets_pause:
env.render()
time.sleep(0.1)
time.sleep(0.01)
k += 1
time.sleep(0.1)
print("END OF GAME! YOUR FINAL SCORE:")
total_rewards.append(total_reward)
print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
print(obser_max)
time.sleep(5)
time.sleep(2)

print("ACTIONS={}".format(ACTIONS))
print("Press left/right arrows (rotation) and space (engine)")
print("No keys pressed is taking action 0")

while 1:
for e in range(episodes):
window_still_open = rollout(env)
if window_still_open == False:
break

print("HUMAN BASELINE SCORE:\n")
print(np.mean(total_rewards))
print(np.std(total_rewards))

0 comments on commit 8722c54

Please sign in to comment.