From ab5dae3b2ef16b39546c28e79cbcd67857cce482 Mon Sep 17 00:00:00 2001 From: dzako Date: Mon, 27 Jun 2022 11:49:07 +0200 Subject: [PATCH] keyboard agent for measuring human score --- gym_space/__init__.py | 4 --- keyboard_agent.py | 60 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/gym_space/__init__.py b/gym_space/__init__.py index 22e9e68..16739a9 100644 --- a/gym_space/__init__.py +++ b/gym_space/__init__.py @@ -83,7 +83,6 @@ kwargs={ "ship_steering": 1, "ship_moi": 0.01, - "reward_value": 0, "rad_penalty_C": 2, "numerator_C": 0.01, "act_penalty_C": 0.5, @@ -102,7 +101,6 @@ kwargs={ "ship_steering": 1, "ship_moi": 0.01, - "reward_value": 0, "rad_penalty_C": 2, "numerator_C": 0.01, "act_penalty_C": 0.5, @@ -121,7 +119,6 @@ kwargs={ "ship_steering": 1, "ship_moi": 0.01, - "reward_value": 0, "rad_penalty_C": 2, "numerator_C": 0.01, "act_penalty_C": 0.5, @@ -140,7 +137,6 @@ kwargs={ "ship_steering": 1, "ship_moi": 0.01, - "reward_value": 0, "rad_penalty_C": 2, "numerator_C": 0.01, "act_penalty_C": 0.5, diff --git a/keyboard_agent.py b/keyboard_agent.py index fd74ffa..8dcc6fe 100644 --- a/keyboard_agent.py +++ b/keyboard_agent.py @@ -4,6 +4,8 @@ import numpy as np if __name__ == "__main__": + total_rewards = [] + episodes = 5 gym.envs.register( id=f"KeplerDiscrete-v0", @@ -25,18 +27,49 @@ ) #env = gym.make("KeplerDiscrete-v0") + gym.envs.register( + id="GoalDiscrete2-v0", + entry_point="gym_space.envs.goal:GoalDiscreteEnv", + max_episode_steps = 500, + kwargs={ + "n_planets": 2, + "ship_steering": 1, + "ship_moi": 0.01, + "survival_reward_scale": 0.2, + "goal_vel_reward_scale": 5.0, + "safety_reward_scale": 10.0, + "goal_sparse_reward": 5.0, + "max_engine_force": 1, + }, + ) gym.envs.register( id="GoalDiscrete3-v0", entry_point="gym_space.envs.goal:GoalDiscreteEnv", + max_episode_steps = 500, kwargs={ "n_planets": 3, "ship_steering": 1, "ship_moi": 0.01, - "survival_reward_scale": 0.0, - "goal_vel_reward_scale": 1.0, - "safety_reward_scale": 2.0, - "goal_sparse_reward": 10.0, - "max_engine_force": 0.4, + "survival_reward_scale": 0.2, + "goal_vel_reward_scale": 5.0, + "safety_reward_scale": 10.0, + "goal_sparse_reward": 5.0, + "max_engine_force": 1, + }, + ) + gym.envs.register( + id="GoalDiscrete4-v0", + entry_point="gym_space.envs.goal:GoalDiscreteEnv", + max_episode_steps = 500, + kwargs={ + "n_planets": 4, + "ship_steering": 1, + "ship_moi": 0.01, + "survival_reward_scale": 0.2, + "goal_vel_reward_scale": 5.0, + "safety_reward_scale": 10.0, + "goal_sparse_reward": 5.0, + "max_engine_force": 1, }, ) env = gym.make(f"GoalDiscrete3-v0") @@ -86,6 +119,7 @@ def key_release(key, mod): env.unwrapped.viewer.window.on_key_press = key_press env.unwrapped.viewer.window.on_key_release = key_release + def rollout(env): global human_agent_action, human_wants_restart, human_sets_pause human_wants_restart = False @@ -95,6 +129,7 @@ def rollout(env): total_reward = 0 total_timesteps = 0 k = 0 + while 1: if not skip: # print("taking action {}".format(human_agent_action)) @@ -120,19 +155,20 @@ def rollout(env): break while human_sets_pause: env.render() - time.sleep(0.1) + time.sleep(0.01) k += 1 time.sleep(0.1) print("END OF GAME! YOUR FINAL SCORE:") + total_rewards.append(total_reward) print("timesteps %i reward %0.2f" % (total_timesteps, total_reward)) print(obser_max) - time.sleep(5) + time.sleep(2) - print("ACTIONS={}".format(ACTIONS)) - print("Press left/right arrows (rotation) and space (engine)") - print("No keys pressed is taking action 0") - - while 1: + for e in range(episodes): window_still_open = rollout(env) if window_still_open == False: break + + print("HUMAN BASELINE SCORE:\n") + print(np.mean(total_rewards)) + print(np.std(total_rewards))