From 1d441f4b24932bf43a2afa963cc8c279222cd347 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Wed, 22 Nov 2023 18:08:31 +0100 Subject: [PATCH 1/6] BaseAECWrapper is now using BaseWrapper from PZ instead of OrderEnforcing --- supersuit/generic_wrappers/utils/shared_wrapper_util.py | 4 ++-- supersuit/utils/base_aec_wrapper.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/supersuit/generic_wrappers/utils/shared_wrapper_util.py b/supersuit/generic_wrappers/utils/shared_wrapper_util.py index de2bb03..e7aef4c 100644 --- a/supersuit/generic_wrappers/utils/shared_wrapper_util.py +++ b/supersuit/generic_wrappers/utils/shared_wrapper_util.py @@ -2,12 +2,12 @@ import gymnasium from pettingzoo.utils import BaseParallelWrapper -from pettingzoo.utils.wrappers import OrderEnforcingWrapper as PettingzooWrap +from pettingzoo.utils.wrappers import BaseWrapper from supersuit.utils.wrapper_chooser import WrapperChooser -class shared_wrapper_aec(PettingzooWrap): +class shared_wrapper_aec(BaseWrapper): def __init__(self, env, modifier_class): super().__init__(env) diff --git a/supersuit/utils/base_aec_wrapper.py b/supersuit/utils/base_aec_wrapper.py index 8d8bc0b..24beb71 100644 --- a/supersuit/utils/base_aec_wrapper.py +++ b/supersuit/utils/base_aec_wrapper.py @@ -1,7 +1,7 @@ -from pettingzoo.utils.wrappers import OrderEnforcingWrapper as PettingzooWrap +from pettingzoo.utils.wrappers import BaseWrapper -class BaseWrapper(PettingzooWrap): +class BaseWrapper(BaseWrapper): def __init__(self, env): """ Creates a wrapper around `env`. Extend this class to create changes to the space. From 831475b597d80ffa0d54283bb6cf146f140706d0 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Fri, 24 Nov 2023 17:25:09 +0100 Subject: [PATCH 2/6] Fix np_random --- supersuit/generic_wrappers/frame_skip.py | 1 + 1 file changed, 1 insertion(+) diff --git a/supersuit/generic_wrappers/frame_skip.py b/supersuit/generic_wrappers/frame_skip.py index 15b80f8..8bc7c79 100644 --- a/supersuit/generic_wrappers/frame_skip.py +++ b/supersuit/generic_wrappers/frame_skip.py @@ -10,6 +10,7 @@ class frame_skip_gym(gymnasium.Wrapper): def __init__(self, env, num_frames): super().__init__(env) + self.np_random = env.unwrapped.np_random self.num_frames = check_transform_frameskip(num_frames) def step(self, action): From 69073d00bb9cb7324522ad49a9a6572bb2f3d3b0 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Fri, 24 Nov 2023 17:27:33 +0100 Subject: [PATCH 3/6] Revert changes --- supersuit/generic_wrappers/utils/shared_wrapper_util.py | 2 +- supersuit/utils/base_aec_wrapper.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/supersuit/generic_wrappers/utils/shared_wrapper_util.py b/supersuit/generic_wrappers/utils/shared_wrapper_util.py index e7aef4c..210dca7 100644 --- a/supersuit/generic_wrappers/utils/shared_wrapper_util.py +++ b/supersuit/generic_wrappers/utils/shared_wrapper_util.py @@ -2,7 +2,7 @@ import gymnasium from pettingzoo.utils import BaseParallelWrapper -from pettingzoo.utils.wrappers import BaseWrapper +from pettingzoo.utils.wrappers import OrderEnforcingWrapper as BaseWrapper from supersuit.utils.wrapper_chooser import WrapperChooser diff --git a/supersuit/utils/base_aec_wrapper.py b/supersuit/utils/base_aec_wrapper.py index 24beb71..f9773d7 100644 --- a/supersuit/utils/base_aec_wrapper.py +++ b/supersuit/utils/base_aec_wrapper.py @@ -1,7 +1,7 @@ -from pettingzoo.utils.wrappers import BaseWrapper +from pettingzoo.utils.wrappers import OrderEnforcingWrapper as PZBaseWrapper -class BaseWrapper(BaseWrapper): +class BaseWrapper(PZBaseWrapper): def __init__(self, env): """ Creates a wrapper around `env`. Extend this class to create changes to the space. From b805392129d42861044e00b9501b0c000d9d8f67 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Tue, 28 Nov 2023 08:55:57 +0100 Subject: [PATCH 4/6] fix imports --- supersuit/lambda_wrappers/reward_lambda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supersuit/lambda_wrappers/reward_lambda.py b/supersuit/lambda_wrappers/reward_lambda.py index 6f9e424..d73cc55 100644 --- a/supersuit/lambda_wrappers/reward_lambda.py +++ b/supersuit/lambda_wrappers/reward_lambda.py @@ -1,6 +1,6 @@ import gymnasium +from pettingzoo.utils import BaseWrapper as PettingzooWrap -from supersuit.utils.base_aec_wrapper import PettingzooWrap from supersuit.utils.make_defaultdict import make_defaultdict from supersuit.utils.wrapper_chooser import WrapperChooser From ffc0b0b9bd9bf354452821ae6350803a1dace0ca Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Tue, 28 Nov 2023 10:15:49 +0100 Subject: [PATCH 5/6] Fix frame skip 2 --- supersuit/generic_wrappers/frame_skip.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/supersuit/generic_wrappers/frame_skip.py b/supersuit/generic_wrappers/frame_skip.py index 8bc7c79..ca3860e 100644 --- a/supersuit/generic_wrappers/frame_skip.py +++ b/supersuit/generic_wrappers/frame_skip.py @@ -147,8 +147,7 @@ def __init__(self, env, num_frames, default_action=None): def step(self, action): action = {**action} low, high = self.num_frames - num_skips = int(self.np_random.integers(low, high + 1)) - self.agents = self.env.agents[:] + num_skips = int(self.env.unwrapped.np_random.integers(low, high + 1)) orig_agents = set(action.keys()) total_reward = make_defaultdict({agent: 0.0 for agent in self.agents}) @@ -191,7 +190,6 @@ def step(self, action): del total_infos[agent] del total_obs[agent] - self.agents = self.env.agents[:] return ( total_obs, total_reward, From 1db594df92dc781d4685f5953aa626f82f5cd31f Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Tue, 28 Nov 2023 10:24:30 +0100 Subject: [PATCH 6/6] Fix a few more things --- supersuit/generic_wrappers/frame_skip.py | 3 +-- supersuit/lambda_wrappers/reward_lambda.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/supersuit/generic_wrappers/frame_skip.py b/supersuit/generic_wrappers/frame_skip.py index ca3860e..65725ee 100644 --- a/supersuit/generic_wrappers/frame_skip.py +++ b/supersuit/generic_wrappers/frame_skip.py @@ -10,12 +10,11 @@ class frame_skip_gym(gymnasium.Wrapper): def __init__(self, env, num_frames): super().__init__(env) - self.np_random = env.unwrapped.np_random self.num_frames = check_transform_frameskip(num_frames) def step(self, action): low, high = self.num_frames - num_skips = int(self.np_random.integers(low, high + 1)) + num_skips = int(self.env.unwrapped.np_random.integers(low, high + 1)) total_reward = 0.0 for x in range(num_skips): diff --git a/supersuit/lambda_wrappers/reward_lambda.py b/supersuit/lambda_wrappers/reward_lambda.py index d73cc55..4547354 100644 --- a/supersuit/lambda_wrappers/reward_lambda.py +++ b/supersuit/lambda_wrappers/reward_lambda.py @@ -24,7 +24,7 @@ def reset(self, seed=None, options=None): super().reset(seed=seed, options=options) self.rewards = { agent: self._change_reward_fn(reward) - for agent, reward in self.rewards.items() + for agent, reward in self.env.rewards.items() # you don't want to unwrap here, because another reward wrapper might have been applied } self.__cumulative_rewards = make_defaultdict({a: 0 for a in self.agents}) self._accumulate_rewards() @@ -34,7 +34,7 @@ def step(self, action): super().step(action) self.rewards = { agent: self._change_reward_fn(reward) - for agent, reward in self.rewards.items() + for agent, reward in self.env.rewards.items() # you don't want to unwrap here, because another reward wrapper might have been applied } self.__cumulative_rewards[agent] = 0 self._cumulative_rewards = self.__cumulative_rewards