diff --git a/supersuit/generic_wrappers/frame_skip.py b/supersuit/generic_wrappers/frame_skip.py index 15b80f8..65725ee 100644 --- a/supersuit/generic_wrappers/frame_skip.py +++ b/supersuit/generic_wrappers/frame_skip.py @@ -14,7 +14,7 @@ def __init__(self, env, num_frames): def step(self, action): low, high = self.num_frames - num_skips = int(self.np_random.integers(low, high + 1)) + num_skips = int(self.env.unwrapped.np_random.integers(low, high + 1)) total_reward = 0.0 for x in range(num_skips): @@ -146,8 +146,7 @@ def __init__(self, env, num_frames, default_action=None): def step(self, action): action = {**action} low, high = self.num_frames - num_skips = int(self.np_random.integers(low, high + 1)) - self.agents = self.env.agents[:] + num_skips = int(self.env.unwrapped.np_random.integers(low, high + 1)) orig_agents = set(action.keys()) total_reward = make_defaultdict({agent: 0.0 for agent in self.agents}) @@ -190,7 +189,6 @@ def step(self, action): del total_infos[agent] del total_obs[agent] - self.agents = self.env.agents[:] return ( total_obs, total_reward, diff --git a/supersuit/generic_wrappers/utils/shared_wrapper_util.py b/supersuit/generic_wrappers/utils/shared_wrapper_util.py index de2bb03..210dca7 100644 --- a/supersuit/generic_wrappers/utils/shared_wrapper_util.py +++ b/supersuit/generic_wrappers/utils/shared_wrapper_util.py @@ -2,12 +2,12 @@ import gymnasium from pettingzoo.utils import BaseParallelWrapper -from pettingzoo.utils.wrappers import OrderEnforcingWrapper as PettingzooWrap +from pettingzoo.utils.wrappers import OrderEnforcingWrapper as BaseWrapper from supersuit.utils.wrapper_chooser import WrapperChooser -class shared_wrapper_aec(PettingzooWrap): +class shared_wrapper_aec(BaseWrapper): def __init__(self, env, modifier_class): super().__init__(env) diff --git a/supersuit/lambda_wrappers/reward_lambda.py b/supersuit/lambda_wrappers/reward_lambda.py index 6f9e424..4547354 100644 --- a/supersuit/lambda_wrappers/reward_lambda.py +++ b/supersuit/lambda_wrappers/reward_lambda.py @@ -1,6 +1,6 @@ import gymnasium +from pettingzoo.utils import BaseWrapper as PettingzooWrap -from supersuit.utils.base_aec_wrapper import PettingzooWrap from supersuit.utils.make_defaultdict import make_defaultdict from supersuit.utils.wrapper_chooser import WrapperChooser @@ -24,7 +24,7 @@ def reset(self, seed=None, options=None): super().reset(seed=seed, options=options) self.rewards = { agent: self._change_reward_fn(reward) - for agent, reward in self.rewards.items() + for agent, reward in self.env.rewards.items() # you don't want to unwrap here, because another reward wrapper might have been applied } self.__cumulative_rewards = make_defaultdict({a: 0 for a in self.agents}) self._accumulate_rewards() @@ -34,7 +34,7 @@ def step(self, action): super().step(action) self.rewards = { agent: self._change_reward_fn(reward) - for agent, reward in self.rewards.items() + for agent, reward in self.env.rewards.items() # you don't want to unwrap here, because another reward wrapper might have been applied } self.__cumulative_rewards[agent] = 0 self._cumulative_rewards = self.__cumulative_rewards diff --git a/supersuit/utils/base_aec_wrapper.py b/supersuit/utils/base_aec_wrapper.py index 8d8bc0b..f9773d7 100644 --- a/supersuit/utils/base_aec_wrapper.py +++ b/supersuit/utils/base_aec_wrapper.py @@ -1,7 +1,7 @@ -from pettingzoo.utils.wrappers import OrderEnforcingWrapper as PettingzooWrap +from pettingzoo.utils.wrappers import OrderEnforcingWrapper as PZBaseWrapper -class BaseWrapper(PettingzooWrap): +class BaseWrapper(PZBaseWrapper): def __init__(self, env): """ Creates a wrapper around `env`. Extend this class to create changes to the space.