From 65086cfbf10a6c73631afc67026e9112c39e7133 Mon Sep 17 00:00:00 2001 From: Kale-ab Tessera Date: Mon, 30 Oct 2023 17:55:12 +0000 Subject: [PATCH 1/8] fix: have access to `terminal_observation` in the infos. --- supersuit/vector/markov_vector_wrapper.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/supersuit/vector/markov_vector_wrapper.py b/supersuit/vector/markov_vector_wrapper.py index 9ad042e..e161f95 100644 --- a/supersuit/vector/markov_vector_wrapper.py +++ b/supersuit/vector/markov_vector_wrapper.py @@ -91,9 +91,14 @@ def step(self, actions): infs = [infos.get(agent, {}) for agent in self.par_env.possible_agents] if env_done: - observations, infs = self.reset() + observations, reset_infs = self.reset() else: observations = self.concat_obs(observations) + # empty infos for reset infs + reset_infs = [{} for _ in range(self.par_env.possible_agents)] + # combine standard infos and reset infos + infs = infs + reset_infs + assert ( self.black_death or self.par_env.agents == self.par_env.possible_agents ), "MarkovVectorEnv does not support environments with varying numbers of active agents unless black_death is set to True" From ba3299d626cb3629bf0fd4bdb833909388d4c4b1 Mon Sep 17 00:00:00 2001 From: Kale-ab Tessera Date: Mon, 30 Oct 2023 18:08:19 +0000 Subject: [PATCH 2/8] fix: fix empty reset_infs. --- supersuit/vector/markov_vector_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supersuit/vector/markov_vector_wrapper.py b/supersuit/vector/markov_vector_wrapper.py index e161f95..ff35d81 100644 --- a/supersuit/vector/markov_vector_wrapper.py +++ b/supersuit/vector/markov_vector_wrapper.py @@ -95,7 +95,7 @@ def step(self, actions): else: observations = self.concat_obs(observations) # empty infos for reset infs - reset_infs = [{} for _ in range(self.par_env.possible_agents)] + reset_infs = [{} for _ in range(len(self.par_env.possible_agents))] # combine standard infos and reset infos infs = infs + reset_infs From 7096918d8a2e60b3a21b830c0c81e3d2a18d2f4a Mon Sep 17 00:00:00 2001 From: Kale-ab Tessera Date: Mon, 6 Nov 2023 18:35:36 +0000 Subject: [PATCH 3/8] feat: Ensure infos list is of size n agents. --- supersuit/vector/markov_vector_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supersuit/vector/markov_vector_wrapper.py b/supersuit/vector/markov_vector_wrapper.py index ff35d81..07521c4 100644 --- a/supersuit/vector/markov_vector_wrapper.py +++ b/supersuit/vector/markov_vector_wrapper.py @@ -97,7 +97,7 @@ def step(self, actions): # empty infos for reset infs reset_infs = [{} for _ in range(len(self.par_env.possible_agents))] # combine standard infos and reset infos - infs = infs + reset_infs + infs = [{**inf, **reset_inf} for inf, reset_inf in zip(infs, reset_infs)] assert ( self.black_death or self.par_env.agents == self.par_env.possible_agents From 9788c6627d4b236de006871d41c9dc96f22d7911 Mon Sep 17 00:00:00 2001 From: Kale-ab Tessera Date: Mon, 6 Nov 2023 18:36:14 +0000 Subject: [PATCH 4/8] feat: test terminal_obs are returned when env reset. --- test/test_vector/test_pettingzoo_to_vec.py | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/test_vector/test_pettingzoo_to_vec.py b/test/test_vector/test_pettingzoo_to_vec.py index 9202a1b..e05d09c 100644 --- a/test/test_vector/test_pettingzoo_to_vec.py +++ b/test/test_vector/test_pettingzoo_to_vec.py @@ -1,5 +1,6 @@ import copy +import numpy as np import pytest from pettingzoo.butterfly import knights_archers_zombies_v10 from pettingzoo.mpe import simple_spread_v3, simple_world_comm_v3 @@ -89,3 +90,27 @@ def test_env_black_death_wrapper(): for i in range(300): actions = [env.action_space.sample() for i in range(env.num_envs)] obss, rews, terms, truncs, infos = env.step(actions) + + +def test_terminal_obs_are_returned(): + """ + If we reach (and pass) the end of the episode, the last observation is returned in the info dict. + """ + max_cycles = 300 + env = knights_archers_zombies_v10.parallel_env(spawn_rate=50, max_cycles=300) + env = pettingzoo_env_to_vec_env_v1(env) + env.reset(seed=42) + + # run past max_cycles or until terminated - causing the env to reset and continue + for _ in range(0, max_cycles + 10): + actions = [env.action_space.sample() for i in range(env.num_envs)] + _, _, terms, truncs, infos = env.step(actions) + + env_done = (np.array(terms) | np.array(truncs)).all() + + if env_done: + # check we have infos for all agents + assert len(infos) == len(env.par_env.possible_agents) + # check infos contain terminal_observation + for info in infos: + assert "terminal_observation" in info From 344ee29ffc069664f9e4779efd8d84db8ee29324 Mon Sep 17 00:00:00 2001 From: Kale-ab Tessera Date: Tue, 7 Nov 2023 12:24:12 +0000 Subject: [PATCH 5/8] fix: add black death wrapper to test. --- test/test_vector/test_pettingzoo_to_vec.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_vector/test_pettingzoo_to_vec.py b/test/test_vector/test_pettingzoo_to_vec.py index e05d09c..bd379e7 100644 --- a/test/test_vector/test_pettingzoo_to_vec.py +++ b/test/test_vector/test_pettingzoo_to_vec.py @@ -98,6 +98,7 @@ def test_terminal_obs_are_returned(): """ max_cycles = 300 env = knights_archers_zombies_v10.parallel_env(spawn_rate=50, max_cycles=300) + env = black_death_v3(env) env = pettingzoo_env_to_vec_env_v1(env) env.reset(seed=42) From 20af5226e1fecd779ed37279e30c2c879bad9f59 Mon Sep 17 00:00:00 2001 From: Kale-ab Tessera Date: Sun, 19 Nov 2023 17:10:47 +0000 Subject: [PATCH 6/8] fix: ensure parallel vec env don't reset using same seed. --- supersuit/vector/markov_vector_wrapper.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/supersuit/vector/markov_vector_wrapper.py b/supersuit/vector/markov_vector_wrapper.py index 07521c4..9f67591 100644 --- a/supersuit/vector/markov_vector_wrapper.py +++ b/supersuit/vector/markov_vector_wrapper.py @@ -52,6 +52,13 @@ def step_wait(self): return self.step(self._saved_actions) def reset(self, seed=None, options=None): + if seed is None: + # To ensure that subprocesses have different seeds, + # we still populate the seed variable when no argument is passed. + # Otherwise parallel vec env workers could have identical seeds (env could default to seed if no seed is passed) + # when reset is called as part of line 101. + seed = int(np.random.randint(0, np.iinfo(np.uint32).max, dtype=np.uint32)) + # TODO: should this be changed to infos? _observations, infos = self.par_env.reset(seed=seed, options=options) observations = self.concat_obs(_observations) @@ -99,6 +106,12 @@ def step(self, actions): # combine standard infos and reset infos infs = [{**inf, **reset_inf} for inf, reset_inf in zip(infs, reset_infs)] + # index by agent ids + infs = {agent: inf for agent, inf in zip(self.par_env.possible_agents, infs)} + + print("infs", infs) + exit() + assert ( self.black_death or self.par_env.agents == self.par_env.possible_agents ), "MarkovVectorEnv does not support environments with varying numbers of active agents unless black_death is set to True" From bc36bdf8abebb3da01c0376c9ca2630009c296d3 Mon Sep 17 00:00:00 2001 From: Kale-ab Tessera Date: Sun, 19 Nov 2023 17:21:02 +0000 Subject: [PATCH 7/8] fix: remove debugging code. --- supersuit/vector/markov_vector_wrapper.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/supersuit/vector/markov_vector_wrapper.py b/supersuit/vector/markov_vector_wrapper.py index 9f67591..7064946 100644 --- a/supersuit/vector/markov_vector_wrapper.py +++ b/supersuit/vector/markov_vector_wrapper.py @@ -106,12 +106,6 @@ def step(self, actions): # combine standard infos and reset infos infs = [{**inf, **reset_inf} for inf, reset_inf in zip(infs, reset_infs)] - # index by agent ids - infs = {agent: inf for agent, inf in zip(self.par_env.possible_agents, infs)} - - print("infs", infs) - exit() - assert ( self.black_death or self.par_env.agents == self.par_env.possible_agents ), "MarkovVectorEnv does not support environments with varying numbers of active agents unless black_death is set to True" From e49c5e19c44ec343c7a5f9a7b5841eec2eb1921f Mon Sep 17 00:00:00 2001 From: Kale-ab Tessera Date: Mon, 27 Nov 2023 16:23:07 +0000 Subject: [PATCH 8/8] fix: remove manual seeding if seed is none. --- supersuit/vector/markov_vector_wrapper.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/supersuit/vector/markov_vector_wrapper.py b/supersuit/vector/markov_vector_wrapper.py index 7064946..07521c4 100644 --- a/supersuit/vector/markov_vector_wrapper.py +++ b/supersuit/vector/markov_vector_wrapper.py @@ -52,13 +52,6 @@ def step_wait(self): return self.step(self._saved_actions) def reset(self, seed=None, options=None): - if seed is None: - # To ensure that subprocesses have different seeds, - # we still populate the seed variable when no argument is passed. - # Otherwise parallel vec env workers could have identical seeds (env could default to seed if no seed is passed) - # when reset is called as part of line 101. - seed = int(np.random.randint(0, np.iinfo(np.uint32).max, dtype=np.uint32)) - # TODO: should this be changed to infos? _observations, infos = self.par_env.reset(seed=seed, options=options) observations = self.concat_obs(_observations)