Hotfix/fix frame skip random (#235)

Farama-Foundation · Nov 28, 2023 · 67e47b4 · 67e47b4
2 parents dc4dc8a + 1db594d
commit 67e47b4
Show file tree

Hide file tree

Showing 4 changed files with 9 additions and 11 deletions.
diff --git a/supersuit/generic_wrappers/frame_skip.py b/supersuit/generic_wrappers/frame_skip.py
@@ -14,7 +14,7 @@ def __init__(self, env, num_frames):
 
     def step(self, action):
         low, high = self.num_frames
-        num_skips = int(self.np_random.integers(low, high + 1))
+        num_skips = int(self.env.unwrapped.np_random.integers(low, high + 1))
         total_reward = 0.0
 
         for x in range(num_skips):
@@ -146,8 +146,7 @@ def __init__(self, env, num_frames, default_action=None):
     def step(self, action):
         action = {**action}
         low, high = self.num_frames
-        num_skips = int(self.np_random.integers(low, high + 1))
-        self.agents = self.env.agents[:]
+        num_skips = int(self.env.unwrapped.np_random.integers(low, high + 1))
         orig_agents = set(action.keys())
 
         total_reward = make_defaultdict({agent: 0.0 for agent in self.agents})
@@ -190,7 +189,6 @@ def step(self, action):
                 del total_infos[agent]
                 del total_obs[agent]
 
-        self.agents = self.env.agents[:]
         return (
             total_obs,
             total_reward,

diff --git a/supersuit/generic_wrappers/utils/shared_wrapper_util.py b/supersuit/generic_wrappers/utils/shared_wrapper_util.py
@@ -2,12 +2,12 @@
 
 import gymnasium
 from pettingzoo.utils import BaseParallelWrapper
-from pettingzoo.utils.wrappers import OrderEnforcingWrapper as PettingzooWrap
+from pettingzoo.utils.wrappers import OrderEnforcingWrapper as BaseWrapper
 
 from supersuit.utils.wrapper_chooser import WrapperChooser
 
 
-class shared_wrapper_aec(PettingzooWrap):
+class shared_wrapper_aec(BaseWrapper):
     def __init__(self, env, modifier_class):
         super().__init__(env)
 

diff --git a/supersuit/lambda_wrappers/reward_lambda.py b/supersuit/lambda_wrappers/reward_lambda.py
@@ -1,6 +1,6 @@
 import gymnasium
+from pettingzoo.utils import BaseWrapper as PettingzooWrap
 
-from supersuit.utils.base_aec_wrapper import PettingzooWrap
 from supersuit.utils.make_defaultdict import make_defaultdict
 from supersuit.utils.wrapper_chooser import WrapperChooser
 
@@ -24,7 +24,7 @@ def reset(self, seed=None, options=None):
         super().reset(seed=seed, options=options)
         self.rewards = {
             agent: self._change_reward_fn(reward)
-            for agent, reward in self.rewards.items()
+            for agent, reward in self.env.rewards.items()  # you don't want to unwrap here, because another reward wrapper might have been applied
         }
         self.__cumulative_rewards = make_defaultdict({a: 0 for a in self.agents})
         self._accumulate_rewards()
@@ -34,7 +34,7 @@ def step(self, action):
         super().step(action)
         self.rewards = {
             agent: self._change_reward_fn(reward)
-            for agent, reward in self.rewards.items()
+            for agent, reward in self.env.rewards.items()  # you don't want to unwrap here, because another reward wrapper might have been applied
         }
         self.__cumulative_rewards[agent] = 0
         self._cumulative_rewards = self.__cumulative_rewards

diff --git a/supersuit/utils/base_aec_wrapper.py b/supersuit/utils/base_aec_wrapper.py
@@ -1,7 +1,7 @@
-from pettingzoo.utils.wrappers import OrderEnforcingWrapper as PettingzooWrap
+from pettingzoo.utils.wrappers import OrderEnforcingWrapper as PZBaseWrapper
 
 
-class BaseWrapper(PettingzooWrap):
+class BaseWrapper(PZBaseWrapper):
     def __init__(self, env):
         """
         Creates a wrapper around `env`. Extend this class to create changes to the space.