From d9aad936f86b76024c17f1efb44fd055eda8cefe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roxana=20R=C4=83dulescu?= <8026679+rradules@users.noreply.github.com> Date: Tue, 26 Mar 2024 13:42:23 +0100 Subject: [PATCH] PCN on IG --- .gitignore | 2 + momaland/learning/morl/ig_env_factory.py | 27 +++++++++++++ momaland/learning/morl/train_ig_GPILS.py | 44 +++++---------------- momaland/learning/morl/train_ig_PCN.py | 49 ++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 34 deletions(-) create mode 100644 momaland/learning/morl/ig_env_factory.py create mode 100644 momaland/learning/morl/train_ig_PCN.py diff --git a/.gitignore b/.gitignore index e78822c4..1c8d468f 100644 --- a/.gitignore +++ b/.gitignore @@ -13,7 +13,9 @@ __pycache__/ # Cluster scripts /hpc momaland/learning/wandb/ +momaland/learning/morl/wandb/ momaland/learning/weights/ +momaland/learning/morl/weights/ # Distribution / packaging .Python diff --git a/momaland/learning/morl/ig_env_factory.py b/momaland/learning/morl/ig_env_factory.py new file mode 100644 index 00000000..bfb1c211 --- /dev/null +++ b/momaland/learning/morl/ig_env_factory.py @@ -0,0 +1,27 @@ +"""Item Gathering environment factory.""" + +from momaland.envs.item_gathering import item_gathering +from momaland.envs.item_gathering.map_utils import DEFAULT_MAP, generate_map +from momaland.utils.parallel_wrappers import CentraliseAgent + + +def get_map_4_O(): + """Generate a map with 4 objectives.""" + return generate_map(rows=8, columns=8, item_distribution=(3, 4, 2, 1), num_agents=2, seed=1) + + +def get_map_2_O(): + """Generate a map with 2 objectives.""" + return generate_map(rows=8, columns=8, item_distribution=(4, 6), num_agents=2, seed=1) + + +def make_single_agent_ig_env(objectives=3): + """Create a centralised agent environment for the Item Gathering domain.""" + if objectives == 2: + env_map = get_map_2_O() + elif objectives == 4: + env_map = get_map_4_O() + else: + env_map = DEFAULT_MAP + ig_env = item_gathering.parallel_env(initial_map=env_map, num_timesteps=50, randomise=False, render_mode=None) + return CentraliseAgent(ig_env, action_mapping=True) diff --git a/momaland/learning/morl/train_ig_GPILS.py b/momaland/learning/morl/train_ig_GPILS.py index c0cdf4cc..5ae6dfd6 100644 --- a/momaland/learning/morl/train_ig_GPILS.py +++ b/momaland/learning/morl/train_ig_GPILS.py @@ -5,37 +5,13 @@ import numpy as np from morl_baselines.multi_policy.gpi_pd.gpi_pd import GPILS -from momaland.envs.item_gathering import item_gathering -from momaland.envs.item_gathering.map_utils import DEFAULT_MAP, generate_map -from momaland.utils.parallel_wrappers import CentraliseAgent - - -def get_map_4_O(): - """Generate a map with 4 objectives.""" - return generate_map(rows=8, columns=8, item_distribution=(3, 4, 2, 1), num_agents=2, seed=1) - - -def get_map_2_O(): - """Generate a map with 2 objectives.""" - return generate_map(rows=8, columns=8, item_distribution=(4, 6), num_agents=2, seed=1) - - -def make_single_agent_ig_env(objectives=3): - """Create a centralised agent environment for the Item Gathering domain.""" - if objectives == 2: - map = get_map_2_O() - elif objectives == 4: - map = get_map_4_O() - else: - map = DEFAULT_MAP - ig_env = item_gathering.parallel_env(initial_map=map, num_timesteps=50, randomise=False, render_mode=None) - return CentraliseAgent(ig_env, action_mapping=True) +from momaland.learning.morl.ig_env_factory import make_single_agent_ig_env if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-seed", type=int, default=42, help="Seed for the agent.") - parser.add_argument("-objectives", type=int, default=3, help="Number of objectives/item types for the IG problem.") + parser.add_argument("-objectives", type=int, default=2, help="Number of objectives/item types for the IG problem.") args = parser.parse_args() seed = args.seed obj = args.objectives @@ -51,12 +27,12 @@ def make_single_agent_ig_env(objectives=3): max_grad_norm=None, learning_rate=3e-4, gamma=0.99, - batch_size=128, - net_arch=[256, 256], - buffer_size=int(2e5), - initial_epsilon=1.0, + batch_size=256, + net_arch=[64, 64], + buffer_size=1000, + initial_epsilon=0.5, final_epsilon=0.05, - epsilon_decay_steps=75000, + epsilon_decay_steps=7500, learning_starts=100, alpha_per=0.6, min_priority=0.01, @@ -66,15 +42,15 @@ def make_single_agent_ig_env(objectives=3): target_net_update_freq=200, tau=1, log=True, - project_name="MOMAland-Baselines", + project_name="MOMAland-Evaluation", seed=seed, ) - timesteps_per_iter = 10000 + timesteps_per_iter = 1000 algo = "gpi-ls" agent.train( - total_timesteps=15 * timesteps_per_iter, + total_timesteps=100 * timesteps_per_iter, eval_env=eval_env, ref_point=ref_point, weight_selection_algo=algo, diff --git a/momaland/learning/morl/train_ig_PCN.py b/momaland/learning/morl/train_ig_PCN.py new file mode 100644 index 00000000..b46cc2d9 --- /dev/null +++ b/momaland/learning/morl/train_ig_PCN.py @@ -0,0 +1,49 @@ +"""MO Gymnasium on centralised agents versions of MOMAland.""" + +import argparse + +import numpy as np +from morl_baselines.multi_policy.pcn.pcn import PCN + +from momaland.learning.morl.ig_env_factory import make_single_agent_ig_env + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-seed", type=int, default=44, help="Seed for the agent.") + parser.add_argument("-objectives", type=int, default=2, help="Number of objectives/item types for the IG problem.") + args = parser.parse_args() + seed = args.seed + obj = args.objectives + + env = make_single_agent_ig_env(objectives=obj) + eval_env = make_single_agent_ig_env(objectives=obj) + + ref_point = np.zeros(obj) + if obj == 2: + max_return = np.array([4.0, 6.0]) + elif obj == 4: + max_return = np.array([3.0, 4.0, 2.0, 1.0]) + print("Reference point: ", ref_point) + + agent = PCN( + env, + seed=seed, + gamma=0.99, + scaling_factor=np.ones(obj + 1), + learning_rate=1e-3, + batch_size=256, + project_name="MOMAland-Evaluation", + experiment_name="PCN", + log=True, + ) + timesteps_per_iter = 10000 + agent.train( + eval_env=eval_env, + total_timesteps=10 * timesteps_per_iter, + ref_point=ref_point, + num_er_episodes=20, + max_buffer_size=50, + num_model_updates=50, + max_return=max_return, + )