diff --git a/.gitignore b/.gitignore index 6bd200b6..7bbcd36e 100644 --- a/.gitignore +++ b/.gitignore @@ -416,6 +416,7 @@ getting_started/venv_310_ray/ grid2op/tests/venv_test_autoclass/ test_eduardo.py grid2op/tests/failed_test* +venv_312 # profiling files **.prof diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1f61008b..82df64ba 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -75,6 +75,8 @@ Next release (wrong sign for the slack generator) - [FIXED] the environment would not load in case of an incorrect "layout.json" instead of raising a warning. +- [FIXED] some issue with gym_compat module for "newest" version of + gymnasium (1.0.0) - [IMPROVED] error message when forecasts are not correctly set-up [1.10.3] - 2024-07-12 diff --git a/docs/gym.rst b/docs/gym.rst index 02e47d79..931bb093 100644 --- a/docs/gym.rst +++ b/docs/gym.rst @@ -29,7 +29,7 @@ your base code. More information on the section :ref:`gymnasium_gym` -Before grid2op 1.2.0 only some classes fully implemented the open AI gym interface: +Before grid2op 1.2.0 only some classes fully implemented the gymnasium interface: - the :class:`grid2op.Environment` (with methods such as `env.reset`, `env.step` etc.) - the :class:`grid2op.Agent` (with the `agent.act` etc.) @@ -37,12 +37,12 @@ Before grid2op 1.2.0 only some classes fully implemented the open AI gym interfa Starting from 1.2.0 we implemented some automatic converters that are able to automatically map -grid2op representation for the action space and the observation space into open AI gym "spaces". More precisely these +grid2op representation for the action space and the observation space into gymnasium "spaces". More precisely these are represented as gym.spaces.Dict. -As of grid2op 1.4.0 we tighten the gap between openAI gym and grid2op by introducing the dedicated module +As of grid2op 1.4.0 we tighten the gap between gymnasium and grid2op by introducing the dedicated module `grid2op.gym_compat` . Withing this module there are lots of functionalities to convert a grid2op environment -into a gym environment (that inherit `gym.Env` instead of "simply" implementing the open ai gym interface). +into a gymnasium environment (that inherit `gymnasium.Env` instead of "simply" implementing the gymnasium interface). A simple usage is: @@ -55,12 +55,12 @@ A simple usage is: env_name = "l2rpn_case14_sandbox" # or any other grid2op environment name g2op_env = grid2op.make(env_name) # create the gri2op environment - gym_env = GymEnv(g2op_env) # create the gym environment + gym_env = GymEnv(g2op_env) # create the gymnasium environment - # check that this is a properly defined gym environment: + # check that this is a properly defined gymnasium environment: import gym - print(f"Is gym_env and open AI gym environment: {isinstance(gym_env, gym.Env)}") - # it shows "Is gym_env and open AI gym environment: True" + print(f"Is gym_env a gymnasium environment: {isinstance(gym_env, gym.Env)}") + # it shows "Is gym_env a gymnasium environment: True" .. note:: @@ -73,9 +73,9 @@ A simple usage is: .. warning:: The `gym` package has some breaking API change since its version 0.26. We attempted, in grid2op, to maintain compatibility both with former versions and later ones. This makes **this - class behave differently depending on the version of gym you have installed** ! + class behave differently depending on the version of gymnasium you have installed** ! - The main changes involve the functions `env.step` and `env.reset` (core gym functions) + The main changes involve the functions `env.step` and `env.reset` (core gymnasium functions) This page is organized as follow: @@ -164,7 +164,7 @@ You can transform the observation space as you wish. There are some examples in Default Action space ****************************** -The default action space is also a type of gym Dict. As for the observation space above, it is a +The default action space is also a type of gymnasium Dict. As for the observation space above, it is a straight translation from the attribute of the action to the key of the dictionary. This gives: - "change_bus": MultiBinary(`env.dim_topo`) @@ -177,7 +177,7 @@ straight translation from the attribute of the action to the key of the dictiona - "raise_alarm": MultiBinary(`env.dim_alarms`) - "raise_alert": MultiBinary(`env.dim_alerts`) -For example you can create a "gym action" (for the default encoding) like: +For example you can create a "gymnasium action" (for the default encoding) like: .. code-block:: python @@ -191,7 +191,7 @@ For example you can create a "gym action" (for the default encoding) like: gym_env = GymEnv(env) seed = ... - obs, info = gym_env.reset(seed) # for new gym interface + obs, info = gym_env.reset(seed) # for new gymnasium interface # do nothing gym_act = {} @@ -199,19 +199,19 @@ For example you can create a "gym action" (for the default encoding) like: #change the bus of the element 6 and 7 of the "topo_vect" gym_act = {} - gym_act["change_bus"] = np.zeros(env.dim_topo, dtype=np.int8) # gym encoding of a multi binary + gym_act["change_bus"] = np.zeros(env.dim_topo, dtype=np.int8) # gymnasium encoding of a multi binary gym_act["change_bus"][[6, 7]] = 1 obs, reward, done, truncated, info = gym_env.step(gym_act) # redispatch generator 2 of 1.7MW gym_act = {} - gym_act["redispatch"] = np.zeros(env.n_gen, dtype=np.float32) # gym encoding of a Box + gym_act["redispatch"] = np.zeros(env.n_gen, dtype=np.float32) # gymnasium encoding of a Box gym_act["redispatch"][2] = 1.7 obs, reward, done, truncated, info = gym_env.step(gym_act) # set the bus of element 8 and 9 to bus 2 gym_act = {} - gym_act["set_bus"] = np.zeros(env.dim_topo, dtype=int) # gym encoding of a Box + gym_act["set_bus"] = np.zeros(env.dim_topo, dtype=int) # gymnasium encoding of a Box gym_act["set_bus"][[8, 9]] = 2 obs, reward, done, truncated, info = gym_env.step(gym_act) @@ -238,7 +238,7 @@ If you want a full control on this spaces, you need to implement something like: env = grid2op.make(env_name) from grid2op.gym_compat import GymEnv - # this of course will not work... Replace "AGymSpace" with a normal gym space, like Dict, Box, MultiDiscrete etc. + # this of course will not work... Replace "AGymSpace" with a normal gymnasium space, like Dict, Box, MultiDiscrete etc. from gym.spaces import AGymSpace gym_env = GymEnv(env) @@ -253,7 +253,7 @@ If you want a full control on this spaces, you need to implement something like: def to_gym(self, observation): # this is this very same function that you need to implement # it should have this exact name, take only one observation (grid2op) as input - # and return a gym object that belong to your space "AGymSpace" + # and return a gymnasium object that belong to your space "AGymSpace" return SomethingThatBelongTo_AGymSpace # eg. return np.concatenate((obs.gen_p * 0.1, np.sqrt(obs.load_p)) @@ -268,7 +268,7 @@ And for the action space: env = grid2op.make(env_name) from grid2op.gym_compat import GymEnv - # this of course will not work... Replace "AGymSpace" with a normal gym space, like Dict, Box, MultiDiscrete etc. + # this of course will not work... Replace "AGymSpace" with a normal gymnasium space, like Dict, Box, MultiDiscrete etc. from gym.spaces import AGymSpace gym_env = GymEnv(env) @@ -282,7 +282,7 @@ And for the action space: def from_gym(self, gym_action): # this is this very same function that you need to implement - # it should have this exact name, take only one action (member of your gym space) as input + # it should have this exact name, take only one action (member of your gymnasium space) as input # and return a grid2op action return TheGymAction_ConvertedTo_Grid2op_Action # eg. return np.concatenate((obs.gen_p * 0.1, np.sqrt(obs.load_p)) @@ -311,7 +311,7 @@ and divide input data by `divide`): env_name = "l2rpn_case14_sandbox" # or any other grid2op environment name g2op_env = grid2op.make(env_name) # create the gri2op environment - gym_env = GymEnv(g2op_env) # create the gym environment + gym_env = GymEnv(g2op_env) # create the gymnasium environment ob_space = gym_env.observation_space ob_space = ob_space.reencode_space("actual_dispatch", @@ -336,7 +336,7 @@ the log of the loads instead of giving the direct value to your agent. This can env_name = "l2rpn_case14_sandbox" # or any other grid2op environment name g2op_env = grid2op.make(env_name) # create the gri2op environment - gym_env = GymEnv(g2op_env) # create the gym environment + gym_env = GymEnv(g2op_env) # create the gymnasium environment ob_space = gym_env.observation_space shape_ = (g2op_env.n_load, ) @@ -350,7 +350,7 @@ the log of the loads instead of giving the direct value to your agent. This can ) gym_env.observation_space = ob_space - # and now you will get the key "log_load" as part of your gym observation. + # and now you will get the key "log_load" as part of your gymnasium observation. A detailed list of such "converter" is documented on the section "Detailed Documentation by class". In the table below we describe some of them (**nb** if you notice a converter is not displayed there, @@ -360,11 +360,11 @@ do not hesitate to write us a "feature request" for the documentation, thanks in Converter name Objective ============================================= ============================================================ :class:`ContinuousToDiscreteConverter` Convert a continuous space into a discrete one -:class:`MultiToTupleConverter` Convert a gym MultiBinary to a gym Tuple of gym Binary and a gym MultiDiscrete to a Tuple of Discrete +:class:`MultiToTupleConverter` Convert a gymnasium MultiBinary to a gymnasium Tuple of gymnasium Binary and a gymnasium MultiDiscrete to a Tuple of Discrete :class:`ScalerAttrConverter` Allows to scale (divide an attribute by something and subtract something from it) -`BaseGymSpaceConverter.add_key`_ Allows you to compute another "part" of the observation space (you add an information to the gym space) +`BaseGymSpaceConverter.add_key`_ Allows you to compute another "part" of the observation space (you add an information to the gymnasium space) `BaseGymSpaceConverter.keep_only_attr`_ Allows you to specify which part of the action / observation you want to keep -`BaseGymSpaceConverter.ignore_attr`_ Allows you to ignore some attributes of the action / observation (they will not be part of the gym space) +`BaseGymSpaceConverter.ignore_attr`_ Allows you to ignore some attributes of the action / observation (they will not be part of the gymnasium space) ============================================= ============================================================ .. warning:: @@ -383,7 +383,7 @@ Converter name Objective .. note:: With the "converters" above, note that the observation space AND action space will still - inherit from gym Dict. + inherit from gymnasium Dict. They are complex spaces that are not well handled by some RL framework. @@ -395,19 +395,19 @@ Converter name Objective Customizing the action and observation space, into Box or Discrete ******************************************************************* -The use of the converter above is nice if you can work with gym Dict, but in some cases, or for some frameworks +The use of the converter above is nice if you can work with gymnasium Dict, but in some cases, or for some frameworks it is not convenient to do it at all. -TO alleviate this problem, we developed 4 types of gym action space, following the architecture +TO alleviate this problem, we developed 4 types of gymnasium action space, following the architecture detailed in subsection :ref:`base_gym_space_function` =============================== ============================================================ Converter name Objective =============================== ============================================================ :class:`BoxGymObsSpace` Convert the observation space to a single "Box" -:class:`BoxGymActSpace` Convert a gym MultiBinary to a gym Tuple of gym Binary and a gym MultiDiscrete to a Tuple of Discrete +:class:`BoxGymActSpace` Convert a gymnasium MultiBinary to a gymnasium Tuple of gymnasium Binary and a gymnasium MultiDiscrete to a Tuple of Discrete :class:`MultiDiscreteActSpace` Allows to scale (divide an attribute by something and subtract something from it) -:class:`DiscreteActSpace` Allows you to compute another "part" of the observation space (you add an information to the gym space) +:class:`DiscreteActSpace` Allows you to compute another "part" of the observation space (you add an information to the gymnasium space) =============================== ============================================================ They can all be used like: diff --git a/docs/makeenv.rst b/docs/makeenv.rst index 55184f7a..493818c6 100644 --- a/docs/makeenv.rst +++ b/docs/makeenv.rst @@ -25,11 +25,11 @@ To get started with such an environment, you can simply do: You can consult the different notebooks in the `getting_stared` directory of this package for more information on how to use it. -Created Environment should behave exactly like a gym environment. If you notice any unwanted behavior, please address +Created Environment should behave exactly like a gymnasium environment. If you notice any unwanted behavior, please address an issue in the official grid2op repository: `Grid2Op `_ -The environment created with this method should be fully compatible with the gym framework: if you are developing -a new algorithm of "Reinforcement Learning" and you used the openai gym framework to do so, you can port your code +The environment created with this method should be fully compatible with the gymnasium framework: if you are developing +a new algorithm of "Reinforcement Learning" and you used the openai gymnasium framework to do so, you can port your code in a few minutes (basically this consists in adapting the input and output dimension of your BaseAgent) and make it work with a Grid2Op environment. An example of such modifications is exposed in the getting_started/ notebooks. diff --git a/docs/model_free.rst b/docs/model_free.rst index 94f8f745..10424d7c 100644 --- a/docs/model_free.rst +++ b/docs/model_free.rst @@ -8,7 +8,7 @@ Model Free Reinforcement Learning See some example in "l2rpn-baselines" package for now ! -The main idea is first to convert the grid2op environment to a gym environment, for example using :ref:`openai-gym`. +The main idea is first to convert the grid2op environment to a gymnasium environment, for example using :ref:`openai-gym`. And then use some libaries available, for example `Stable Baselines `_ or `RLLIB `_ diff --git a/docs/plot.rst b/docs/plot.rst index 25058cf4..ab7f6f93 100644 --- a/docs/plot.rst +++ b/docs/plot.rst @@ -76,10 +76,10 @@ An possible output will look like this: Render the state of the grid ----------------------------- -During the gym loop -++++++++++++++++++++ +During the gymnasium loop +++++++++++++++++++++++++++ In Grid2Op we also made available the possibility to render the state of the grid that your agent sees before taking -an action. This can be done with the provided environments following openAI gym interface like this: +an action. This can be done with the provided environments following gymnasium interface like this: .. code-block:: python @@ -104,7 +104,7 @@ significantly. Offline, after the scenarios were played ++++++++++++++++++++++++++++++++++++++++ -In Grid2Op, you can execute a :ref:`runner-module` to perform the "gym loops" and store the results +In Grid2Op, you can execute a :ref:`runner-module` to perform the "gymnasium loops" and store the results in a standardized manner. Once stored, the results can be loaded back and "replayed" using the appropriate class. Here is how you can do this: diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 3955b818..54309452 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -88,7 +88,7 @@ that are available, without any installation thanks to `Binder `_ . Feel free to visit the "getting_started" page for more information and a detailed tour about the issue that grid2op tries to address. -The most basic code, for those familiar with openAI gym (a well-known framework in reinforcement learning) is: +The most basic code, for those familiar with gymnasium (a well-known framework in reinforcement learning) is: .. code-block:: python @@ -101,7 +101,7 @@ The most basic code, for those familiar with openAI gym (a well-known framework from grid2op.Agent import RandomAgent my_agent = RandomAgent(env.action_space) - # proceed as you would any open ai gym loop + # proceed as you would any gymnasium loop nb_episode = 10 for _ in range(nb_episode): # you perform in this case 10 different episodes @@ -115,9 +115,9 @@ The most basic code, for those familiar with openAI gym (a well-known framework act = my_agent.act(obs, reward, done) obs, reward, done, info = env.step(act) -.. warning:: Grid2Op environments implements the interface of defined by openAI gym environment, but they don't - inherit from them. You can use the Grid2Op environment as you would any Gym environment but they are - not strictly speaking gym environment. +.. warning:: Grid2Op environments implements the interface of defined by gymnasium environment, but they don't + inherit from them. You can use the Grid2Op environment as you would any gymnasium environment but they are + not strictly speaking gymnasium environment. To make the use of grid2op alongside grid2op environment easier, we developed a module described in :ref:`openai-gym`. diff --git a/docs/user/environment.rst b/docs/user/environment.rst index 3b4af59c..5c1c9613 100644 --- a/docs/user/environment.rst +++ b/docs/user/environment.rst @@ -32,7 +32,7 @@ In this section we present some way to use the :class:`Environment` class. Basic Usage ++++++++++++ -This example is adapted from gym documentation available at +This example is adapted from gymnasium documentation available at `gym random_agent.py `_ ): .. code-block:: python diff --git a/docs/user/runner.rst b/docs/user/runner.rst index 2752971c..8f96ffaf 100644 --- a/docs/user/runner.rst +++ b/docs/user/runner.rst @@ -13,7 +13,7 @@ Objectives The runner class aims at: i) facilitate the evaluation of the performance of :class:`grid2op.Agent` by performing automatically the - "open ai gym loop" (see below) + "gymnasium loop" (see below) ii) define a format to store the results of the evaluation of such agent in a standardized manner iii) this "agent logs" can then be re read by third party applications, such as `grid2viz `_ or by internal class to ease the study of the behaviour of @@ -21,7 +21,7 @@ iii) this "agent logs" can then be re read by third party applications, such as :class:`grid2op.Episode.EpisodeReplay` iv) allow easy use of parallelization of this assessment. -Basically, the runner simplifies the assessment of the performance of some agent. This is the "usual" gym code to run +Basically, the runner simplifies the assessment of the performance of some agent. This is the "usual" gymnasium code to run an agent: .. code-block:: python diff --git a/grid2op/Chronics/multiFolder.py b/grid2op/Chronics/multiFolder.py index e8b8c9b4..9e71b8da 100644 --- a/grid2op/Chronics/multiFolder.py +++ b/grid2op/Chronics/multiFolder.py @@ -357,7 +357,7 @@ def sample_next_chronics(self, probabilities=None): id_sel = (self._order == selected).nonzero()[0] self._prev_cache_id = selected - 1 return id_sel - + def reset(self): """ Rebuilt the :attr:`Multifolder._order`. This should be called after a call to :func:`Multifolder.set_filter` diff --git a/grid2op/Converter/IdToAct.py b/grid2op/Converter/IdToAct.py index 063b1f59..1adf80dc 100644 --- a/grid2op/Converter/IdToAct.py +++ b/grid2op/Converter/IdToAct.py @@ -274,7 +274,16 @@ def init_converter(self, all_actions=None, **kwargs): "grid2op action. The error was:\n{}".format(e) ) from exc_ else: - raise RuntimeError("Impossible to load the action provided.") + # first make sure that all action is "correct" + try: + nb = len(all_actions) # assert I can compute the "len" + for i in range(nb): + act = all_actions[i] # assert I can use the `[]` operator + assert isinstance(act, BaseAction) # assert what's in there is a BaseAction + except Exception as exc_: + raise RuntimeError("Impossible to load the action provided.") from exc_ + # does not copy here (to save memory in case of shared memory setting) + self.all_actions = all_actions self.n = len(self.all_actions) def filter_action(self, filtering_fun): diff --git a/grid2op/gym_compat/__init__.py b/grid2op/gym_compat/__init__.py index 0672745d..71ff5a96 100644 --- a/grid2op/gym_compat/__init__.py +++ b/grid2op/gym_compat/__init__.py @@ -25,7 +25,7 @@ if GYM_AVAILABLE is False and GYMNASIUM_AVAILABLE is False: raise ImportError("Neither gymnasium nor gym are installed. The `grid2op.gym_compat` module cannot be used.") -# base for all gym converter +# base for all gymnasium / gym converter from grid2op.gym_compat.base_gym_attr_converter import BaseGymAttrConverter if GYMNASIUM_AVAILABLE: from grid2op.gym_compat.base_gym_attr_converter import BaseGymnasiumAttrConverter diff --git a/grid2op/gym_compat/base_gym_attr_converter.py b/grid2op/gym_compat/base_gym_attr_converter.py index b56264b6..9ffd443e 100644 --- a/grid2op/gym_compat/base_gym_attr_converter.py +++ b/grid2op/gym_compat/base_gym_attr_converter.py @@ -72,12 +72,12 @@ def initialize_space(self, space): def gym_to_g2op(self, gym_object): """ - Convert a gym object to a grid2op object + Convert a gymnasium object to a grid2op object Parameters ---------- gym_object: - An object (action or observation) represented as a gym "ordered dictionary" + An object (action or observation) represented as a gymnasium "dictionary" Returns ------- @@ -86,13 +86,13 @@ def gym_to_g2op(self, gym_object): """ if self._my_gym_to_g2op is None: raise NotImplementedError( - "Unable to convert gym object to grid2op object with this converter" + "Unable to convert gymnasium object to grid2op object with this converter" ) return self._my_gym_to_g2op(gym_object) def g2op_to_gym(self, g2op_object): """ - Convert a gym object to a grid2op object + Convert a gymnasium object to a grid2op object Parameters ---------- @@ -102,12 +102,12 @@ def g2op_to_gym(self, g2op_object): Returns ------- - The same object, represented as a gym "ordered dictionary" + The same object, represented as a gymnasium "ordered dictionary" """ if self._my_g2op_to_gym is None: raise NotImplementedError( - "Unable to convert grid2op object to gym object with this converter" + "Unable to convert grid2op object to gymnasium object with this converter" ) return self._my_g2op_to_gym(g2op_object) diff --git a/grid2op/gym_compat/box_gym_actspace.py b/grid2op/gym_compat/box_gym_actspace.py index 0516fcf7..3dd4ab98 100644 --- a/grid2op/gym_compat/box_gym_actspace.py +++ b/grid2op/gym_compat/box_gym_actspace.py @@ -43,7 +43,7 @@ class __AuxBoxGymActSpace: """ - This class allows to convert a grid2op action space into a gym "Box" which is + This class allows to convert a grid2op action space into a gymnasium "Box" which is a regular Box in R^d. It also allows to customize which part of the action you want to use and offer capacity to @@ -54,7 +54,7 @@ class __AuxBoxGymActSpace: this is not recommended at all to use it for discrete attribute (set_bus, change_bus, set_line_status or change_line_status) ! - Basically, when doing action in gym for these attributes, this converter will involve rounding and + Basically, when doing action in gymnasium for these attributes, this converter will involve rounding and is definitely not the best representation. Prefer the :class:`MultiDiscreteActSpace` or the :class:`DiscreteActSpace` classes. @@ -136,7 +136,7 @@ class __AuxBoxGymActSpace: gym_env = GymEnv(env) gym_env.action_space = BoxGymActSpace(env.action_space) - obs = gym_env.reset() # obs will be an OrderedDict (default, but you can customize it) + obs = gym_env.reset() # obs will be an Dict (default, but you can customize it) # you can do a "do nothing" action act = np.zeros(gym_env.action_space.shape) @@ -174,7 +174,7 @@ class __AuxBoxGymActSpace: env = grid2op.make(env_name) from grid2op.gym_compat import GymEnv - # this of course will not work... Replace "AGymSpace" with a normal gym space, like Dict, Box, MultiDiscrete etc. + # this of course will not work... Replace "AGymSpace" with a normal gymnasium space, like Dict, Box, MultiDiscrete etc. from gym.spaces import AGymSpace gym_env = GymEnv(env) @@ -188,7 +188,7 @@ def __init__(self, whatever, you, want): def from_gym(self, gym_action): # this is this very same function that you need to implement - # it should have this exact name, take only one action (member of your gym space) as input + # it should have this exact name, take only one action (member of your gymnasium space) as input # and return a grid2op action return TheGymAction_ConvertedTo_Grid2op_Action # eg. return np.concatenate((obs.gen_p * 0.1, np.sqrt(obs.load_p)) @@ -461,7 +461,7 @@ def _get_info(self, functs): shape = (shape[0] + shape_[0],) # handle low / high - # NB: the formula is: glop = gym * multiply + add + # NB: the formula is: glop = gymnasium * multiply + add if el in self._add: low_ = 1.0 * low_.astype(dtype) high_ = 1.0 * high_.astype(dtype) @@ -543,7 +543,7 @@ def _handle_attribute(self, res, gym_act_this, attr_nm): return res def get_indexes(self, key: POSSIBLE_KEYS) -> Tuple[int, int]: - """Allows to retrieve the indexes of the gym action that + """Allows to retrieve the indexes of the gymnasium action that are concerned by the attribute name `key` given in input. Parameters @@ -587,14 +587,14 @@ def get_indexes(self, key: POSSIBLE_KEYS) -> Tuple[int, int]: def from_gym(self, gym_act: np.ndarray) -> BaseAction: """ - This is the function that is called to transform a gym action (in this case a numpy array!) + This is the function that is called to transform a gymnasium action (in this case a numpy array!) sent by the agent and convert it to a grid2op action that will be sent to the underlying grid2op environment. Parameters ---------- gym_act: ``numpy.ndarray`` - the gym action + the gymnasium action Returns ------- diff --git a/grid2op/gym_compat/box_gym_obsspace.py b/grid2op/gym_compat/box_gym_obsspace.py index eefe7189..298488cb 100644 --- a/grid2op/gym_compat/box_gym_obsspace.py +++ b/grid2op/gym_compat/box_gym_obsspace.py @@ -87,7 +87,7 @@ class __AuxBoxGymObsSpace: """ - This class allows to convert a grid2op observation space into a gym "Box" which is + This class allows to convert a grid2op observation space into a gymnasium "Box" which is a regular Box in R^d. It also allows to customize which part of the observation you want to use and offer capacity to @@ -138,7 +138,7 @@ class __AuxBoxGymObsSpace: attr_to_keep=['load_p', "gen_p", "rho]) You can also apply some basic transformation to the attribute of the observation before building - the resulting gym observation (which in this case is a vector). This can be done with: + the resulting gymnasium observation (which in this case is a vector). This can be done with: .. code-block:: python @@ -788,7 +788,7 @@ def _handle_attribute(self, grid2op_observation, attr_nm): def to_gym(self, grid2op_observation): """ This is the function that is called to transform a grid2Op observation, sent by the grid2op environment - and convert it to a numpy array (an element of a gym Box) + and convert it to a numpy array (an element of a gymnasium Box) Parameters ---------- @@ -798,7 +798,7 @@ def to_gym(self, grid2op_observation): Returns ------- res: :class:`numpy.ndarray` - A numpy array compatible with the openAI gym Box that represents the action space. + A numpy array compatible with the openAI gymnasium Box that represents the action space. """ res = np.empty(shape=self.shape, dtype=self.dtype) @@ -818,7 +818,7 @@ def close(self): pass def get_indexes(self, key: str) -> Tuple[int, int]: - """Allows to retrieve the indexes of the gym action that + """Allows to retrieve the indexes of the gymnasium action that are concerned by the attribute name `key` given in input. .. versionadded:: 1.9.3 diff --git a/grid2op/gym_compat/continuous_to_discrete.py b/grid2op/gym_compat/continuous_to_discrete.py index f27ba60c..3c4de285 100644 --- a/grid2op/gym_compat/continuous_to_discrete.py +++ b/grid2op/gym_compat/continuous_to_discrete.py @@ -97,7 +97,7 @@ def __init__(self, nb_bins, init_space=None): def initialize_space(self, init_space): if not isinstance(init_space, type(self)._BoxType): raise RuntimeError( - "Impossible to convert a gym space of type {} to a discrete space" + "Impossible to convert a gymnasium space of type {} to a discrete space" " (it should be of " "type space.Box)" "".format(type(init_space)) diff --git a/grid2op/gym_compat/discrete_gym_actspace.py b/grid2op/gym_compat/discrete_gym_actspace.py index 4e89c448..d7c5fe6b 100644 --- a/grid2op/gym_compat/discrete_gym_actspace.py +++ b/grid2op/gym_compat/discrete_gym_actspace.py @@ -24,10 +24,10 @@ class __AuxDiscreteActSpace: """ TODO the documentation of this class is in progress. - This class allows to convert a grid2op action space into a gym "Discrete". This means that the action are + This class allows to convert a grid2op action space into a gymnasium "Discrete". This means that the action are labeled, and instead of describing the action itself, you provide only its ID. - Let's take an example of line disconnection. In the "standard" gym representation you need to: + Let's take an example of line disconnection. In the "standard" gymnasium representation you need to: .. code-block:: python @@ -113,7 +113,7 @@ class __AuxDiscreteActSpace: .. note:: This class is really closely related to the :class:`grid2op.Converter.IdToAct`. It basically "maps" - this "IdToAct" into a type of gym space, which, in this case, will be a `Discrete` one. + this "IdToAct" into a type of gymnasium space, which, in this case, will be a `Discrete` one. .. note:: By default, the "do nothing" action is encoded by the integer '0'. @@ -322,14 +322,14 @@ def _get_info(self): def from_gym(self, gym_act: int) -> BaseAction: """ - This is the function that is called to transform a gym action (in this case a numpy array!) + This is the function that is called to transform a gymnasium action (in this case a numpy array!) sent by the agent and convert it to a grid2op action that will be sent to the underlying grid2op environment. Parameters ---------- gym_act: ``int`` - the gym action (a single integer for this action space) + the gymnasium action (a single integer for this action space) Returns ------- diff --git a/grid2op/gym_compat/gym_act_space.py b/grid2op/gym_compat/gym_act_space.py index 984de412..c0dd4643 100644 --- a/grid2op/gym_compat/gym_act_space.py +++ b/grid2op/gym_compat/gym_act_space.py @@ -6,7 +6,6 @@ # SPDX-License-Identifier: MPL-2.0 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. -from collections import OrderedDict import warnings import numpy as np @@ -18,20 +17,20 @@ from grid2op.Action import BaseAction, ActionSpace from grid2op.dtypes import dt_int, dt_bool, dt_float from grid2op.Converter.Converters import Converter -from grid2op.gym_compat.utils import GYM_AVAILABLE, GYMNASIUM_AVAILABLE, ActType +from grid2op.gym_compat.utils import GYM_AVAILABLE, GYMNASIUM_AVAILABLE, DictType class __AuxGymActionSpace: """ - This class enables the conversion of the action space into a gym "space". + This class enables the conversion of the action space into a gymnasium "space". Resulting action space will be a :class:`gym.spaces.Dict`. - **NB** it is NOT recommended to use the sample of the gym action space. Please use the sampling ( + **NB** it is NOT recommended to use the sample of the gymnasium action space. Please use the sampling ( if availabe) of the original action space instead [if not available this means there is no implemented way to generate reliable random action] - **Note** that gym space converted with this class should be seeded independently. It is NOT seeded + **Note** that gymnasium space converted with this class should be seeded independently. It is NOT seeded when calling :func:`grid2op.Environment.Environment.seed`. .. warning:: @@ -51,7 +50,7 @@ class __AuxGymActionSpace: See :ref:`gymnasium_gym` for more information .. note:: - A gymnasium Dict is encoded as a OrderedDict (`from collection import OrderedDict`) + A gymnasium Dict can be encoded as a OrderedDict (`from collection import OrderedDict`) see the example section for more information. Examples @@ -69,7 +68,7 @@ class __AuxGymActionSpace: env = grid2op.make(env_name) gym_env = GymEnv(env) - obs = gym_env.reset() # obs will be an OrderedDict (default, but you can customize it) + obs = gym_env.reset() # obs will be an Dict (default, but you can customize it) # is equivalent to "do nothing" act = {} @@ -151,7 +150,7 @@ def __init__(self, env, converter=None, dict_variables=None): # TODO Make sure it works well ! if converter is not None and isinstance(converter, Converter): - # a converter allows to ... convert the data so they have specific gym space + # a converter allows to ... convert the data so they have specific gymnasium space # self.initial_act_space = converter self._converter = converter self._template_act = converter.init_action_space() @@ -159,7 +158,7 @@ def __init__(self, env, converter=None, dict_variables=None): self.__is_converter = True elif converter is not None: raise RuntimeError( - 'Impossible to initialize a gym action space with a converter of type "{}" ' + 'Impossible to initialize a gymnasium action space with a converter of type "{}" ' "A converter should inherit from grid2op.Converter".format( type(converter) ) @@ -246,7 +245,7 @@ def _fill_dict_act_space(self, dict_, dict_variables): self._template_act.dtypes() ): if sh == 0: - # do not add "empty" (=0 dimension) arrays to gym otherwise it crashes + # do not add "empty" (=0 dimension) arrays to gymnasium otherwise it crashes continue my_type = None shape = (sh,) @@ -312,14 +311,14 @@ def _fix_dict_keys(self, dict_: dict) -> dict: res[self.keys_grid2op_2_human[k]] = v return res - def from_gym(self, gymlike_action: OrderedDict) -> object: + def from_gym(self, gymlike_action: DictType) -> object: """ Transform a gym-like action (such as the output of "sample()") into a grid2op action Parameters ---------- - gymlike_action: :class:`gym.spaces.dict.OrderedDict` - The action, represented as a gym action (ordered dict) + gymlike_action: :class:`gym.spaces.dict.Dict` + The action, represented as a gymnasium action (ordered dict) Returns ------- @@ -343,9 +342,9 @@ def from_gym(self, gymlike_action: OrderedDict) -> object: res._assign_attr_from_name(internal_k, tmp) return res - def to_gym(self, action: object) -> OrderedDict: + def to_gym(self, action: object) -> DictType: """ - Transform an action (non gym) into an action compatible with the gym Space. + Transform an action (non gymnasium) into an action compatible with the gymnasium Space. Parameters ---------- @@ -355,7 +354,7 @@ def to_gym(self, action: object) -> OrderedDict: Returns ------- gym_action: - The same action converted as a OrderedDict (default used by gym in case of action space + The same action converted as a Dict (default used by gymnasium in case of action space being Dict) """ diff --git a/grid2op/gym_compat/gym_obs_space.py b/grid2op/gym_compat/gym_obs_space.py index 170435d0..e1776d2b 100644 --- a/grid2op/gym_compat/gym_obs_space.py +++ b/grid2op/gym_compat/gym_obs_space.py @@ -16,26 +16,22 @@ BaseMultiProcessEnvironment, ) from grid2op.gym_compat.utils import GYM_AVAILABLE, GYMNASIUM_AVAILABLE -if GYMNASIUM_AVAILABLE: - from gymnasium import spaces # only used for type hints -elif GYM_AVAILABLE: - from gym import spaces from grid2op.Observation import BaseObservation from grid2op.dtypes import dt_int, dt_bool, dt_float -from grid2op.gym_compat.utils import _compute_extra_power_for_losses +from grid2op.gym_compat.utils import _compute_extra_power_for_losses, DictType class __AuxGymObservationSpace: """ TODO explain gym / gymnasium - This class allows to transform the observation space into a gym space. + This class allows to transform the observation space into a gymnasium space. - Gym space will be a :class:`gym.spaces.Dict` with the keys being the different attributes + Gymnasium space will be a :class:`gym.spaces.Dict` with the keys being the different attributes of the grid2op observation. All attributes are used. - Note that gym space converted with this class should be seeded independently. It is NOT seeded + Note that gymnasium space converted with this class should be seeded independently. It is NOT seeded when calling :func:`grid2op.Environment.Environment.seed`. .. warning:: @@ -65,7 +61,7 @@ class __AuxGymObservationSpace: env = grid2op.make("l2rpn_case14_sandbox") gym_observation_space = GymObservationSpace(env.observation_space) - # and now gym_observation_space is a `gym.spaces.Dict` representing the observation space + # and now gym_observation_space is a `gymnasium.spaces.dict.Dict` representing the observation space # you can "convert" the grid2op observation to / from this space with: @@ -74,7 +70,7 @@ class __AuxGymObservationSpace: # the conversion from gym_obs to grid2op obs is feasible, but i don't imagine # a situation where it is useful. And especially, you will not be able to - # use "obs.simulate" for the observation converted back from this gym action. + # use "obs.simulate" for the observation converted back from this gymnasium action. Notes ----- @@ -123,12 +119,12 @@ def __init__(self, env, dict_variables=None): self._env_params = env._env_params self._opp_attack_max_duration = env._opp_attack_max_duration else: - raise RuntimeError("Unknown way to build a gym observation space") + raise RuntimeError("Unknown way to build a gymnasium observation space") - dict_ = {} # will represent the gym.Dict space + dict_ = {} # will represent the gymnasium.Dict space if dict_variables is None: - # get the extra variables in the gym space I want to get + # get the extra variables in the gymnasium space I want to get dict_variables = { "thermal_limit": type(self)._BoxType( @@ -256,7 +252,7 @@ def _fill_dict_obs_space( self.initial_obs.dtypes(), ): if sh == 0: - # do not add "empty" (=0 dimension) arrays to gym otherwise it crashes + # do not add "empty" (=0 dimension) arrays to gymnasium otherwise it crashes continue if (attr_nm in dict_ or @@ -411,14 +407,14 @@ def _fill_dict_obs_space( my_type = self._generic_gym_space(dt, sh) dict_[attr_nm] = my_type - def from_gym(self, gymlike_observation: spaces.dict.OrderedDict) -> BaseObservation: + def from_gym(self, gymlike_observation: DictType) -> BaseObservation: """ This function convert the gym-like representation of an observation to a grid2op observation. Parameters ---------- - gymlike_observation: :class:`gym.spaces.dict.OrderedDict` - The observation represented as a gym ordered dict + gymlike_observation: :class:`gymnasium.spaces.dict.Dict` + The observation represented as a gymnasium dict Returns ------- @@ -434,9 +430,9 @@ def from_gym(self, gymlike_observation: spaces.dict.OrderedDict) -> BaseObservat f"This key is ignored.") return res - def to_gym(self, grid2op_observation: BaseObservation) -> spaces.dict.OrderedDict: + def to_gym(self, grid2op_observation: BaseObservation) -> DictType: """ - Convert a grid2op observation into a gym ordered dict. + Convert a grid2op observation into a gymnasium Dict. Parameters ---------- @@ -445,8 +441,8 @@ def to_gym(self, grid2op_observation: BaseObservation) -> spaces.dict.OrderedDic Returns ------- - gymlike_observation: :class:`gym.spaces.dict.OrderedDict` - The corresponding gym ordered dict + gymlike_observation: :class:`gymnasium.spaces.dict.Dict` + The corresponding gymnasium dict """ return self._base_to_gym( diff --git a/grid2op/gym_compat/gym_space_converter.py b/grid2op/gym_compat/gym_space_converter.py index 6e9953d2..5aa7d509 100644 --- a/grid2op/gym_compat/gym_space_converter.py +++ b/grid2op/gym_compat/gym_space_converter.py @@ -20,7 +20,7 @@ class __AuxBaseGymSpaceConverter: INTERNAL .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ - Used as a base class to convert grid2op state to gym state (wrapper for some useful function + Used as a base class to convert grid2op state to gymnasium state (wrapper for some useful function for both the action space and the observation space). .. warning:: @@ -107,7 +107,7 @@ def _extract_obj_grid2op(vect, dtype, key): return res def _base_to_gym(self, keys, obj, dtypes, converter=None): - """convert the obj (grid2op object) into a gym observation / action space""" + """convert the obj (grid2op object) into a gymnasium observation / action space""" res = OrderedDict() for k in keys: if k in self.__func: @@ -139,7 +139,7 @@ def _base_to_gym(self, keys, obj, dtypes, converter=None): def add_key(self, key_name, function, return_type): """ - Allows to add arbitrary function to the representation, as a gym environment of + Allows to add arbitrary function to the representation, as a gymnasium environment of the action space of the observation space. TODO @@ -165,7 +165,7 @@ def add_key(self, key_name, function, return_type): Examples --------- In the example below, we explain how to add the "connectivity_matrix" as part of the observation space - (when converted to gym). The new key "connectivity matrix" will be added to the gym observation. + (when converted to gym). The new key "connectivity matrix" will be added to the gymnasium observation. .. code-block:: python @@ -181,7 +181,7 @@ def add_key(self, key_name, function, return_type): from grid2op.gym_compat import GymEnv env_gym = GymEnv(env_glop) - # default gym environment, the connectivity matrix is not computed + # default gymnasium environment, the connectivity matrix is not computed obs_gym = env_gym.reset() print(f"Is the connectivity matrix part of the observation in gym: {'connectivity_matrix' in obs_gym}") @@ -266,7 +266,11 @@ def seed(self, seed=None): of openAI gym """ seeds = super(type(self)._DictType, self).seed(seed) - sub_seeds = seeds + if isinstance(seeds, (int, dt_int)): + # newer gymansium version returns int and not a list + sub_seeds = [seeds] + else: + sub_seeds = seeds max_ = np.iinfo(dt_int).max for i, space_key in enumerate(sorted(self.spaces.keys())): sub_seed = sample_seed(max_, self.np_random) diff --git a/grid2op/gym_compat/gymenv.py b/grid2op/gym_compat/gymenv.py index db6c59a4..9b054dc4 100644 --- a/grid2op/gym_compat/gymenv.py +++ b/grid2op/gym_compat/gymenv.py @@ -34,8 +34,8 @@ def decorator(func): class __AuxGymEnv(Generic[ObsType, ActType]): """ - fully implements the openAI gym API by using the :class:`GymActionSpace` and :class:`GymObservationSpace` - for compliance with openAI gym. + fully implements the gymnasium API by using the :class:`GymActionSpace` and :class:`GymObservationSpace` + for compliance with gymnasium. They can handle action_space_converter or observation_space converter to change the representation of data that will be fed to the agent. #TODO @@ -43,7 +43,7 @@ class __AuxGymEnv(Generic[ObsType, ActType]): .. warning:: The `gym` package has some breaking API change since its version 0.26. Depending on the version installed, we attempted, in grid2op, to maintain compatibility both with former version and later one. This makes this - class behave differently depending on the version of gym you have installed ! + class behave differently depending on the version of gymnasium / gym you have installed ! The main changes involve the functions `env.step` and `env.reset` @@ -72,7 +72,7 @@ class behave differently depending on the version of gym you have installed ! Notes ------ - The environment passed as input is copied. It is not modified by this "gym environment" + The environment passed as input is copied. It is not modified by this "gymnasium environment" Examples -------- @@ -85,7 +85,7 @@ class behave differently depending on the version of gym you have installed ! env_name = "l2rpn_case14_sandbox" # or any other name env = grid2op.make(env_name) - gym_env = GymEnv(env) # is a gym environment properly inheriting from gym.Env ! + gym_env = GymEnv(env) # is a gymnasium environment properly inheriting from gym.Env ! There are a few difference between "raw" grid2op environment and gymnasium environments. @@ -101,7 +101,7 @@ class behave differently depending on the version of gym you have installed ! In gym, there are no specific representations of the action class. More precisely, for each action type (:class:`MultiDiscreteActSpace`, :class:`DiscreteActSpace`, :class:`BoxGymActSpace` or :class:`GymActionSpace`) there is a way to encode it. For example, by default (:class:`GymActionSpace`) - an action is represented through an OrderedDict (`from collection import OrderedDict`) + an action is represented through an Dict (`from collection import OrderedDict`) """ def __init__(self, @@ -132,7 +132,7 @@ def __init__(self, super().__init__() # super should reference either gym.Env or gymnasium.Env if not hasattr(self, "_np_random"): - # for older version of gym it does not exist + # for older version of gymnasium it does not exist self._np_random = np.random.RandomState() def _aux_step(self, gym_action: ActType) -> Tuple[ObsType, float, bool, STEP_INFO_TYPING]: @@ -149,6 +149,8 @@ def _aux_step_new(self, gym_action: ActType) -> Tuple[ObsType, float, bool, bool g2op_obs, reward, terminated, info = self.init_env.step(g2op_act) gym_obs = self.observation_space.to_gym(g2op_obs) truncated = False # see https://github.com/openai/gym/pull/2752 + if "exception" in info: + info["exception"] = [str(el) for el in info["exception"]] return gym_obs, float(reward), terminated, truncated, info def _aux_reset(self, @@ -181,16 +183,17 @@ def _aux_reset(self, def _aux_reset_new(self, seed: Optional[int]=None, options: RESET_OPTIONS_TYPING=None) -> Tuple[ObsType,RESET_INFO_GYM_TYPING]: - # used for gym > 0.26 - if (self._shuffle_chronics and - isinstance(self.init_env.chronics_handler.real_data, Multifolder) and - (options is not None and _TIME_SERIE_ID not in options)): - self.init_env.chronics_handler.sample_next_chronics() super().reset(seed=seed) # seed gymnasium env if seed is not None: self._aux_seed_spaces() seed, next_seed, underlying_env_seeds = self._aux_seed_g2op(seed) + + # used for gym > 0.26 + if (self._shuffle_chronics and + isinstance(self.init_env.chronics_handler.real_data, Multifolder) and + (not (options is not None and _TIME_SERIE_ID in options))): + self.init_env.chronics_handler.sample_next_chronics() # we don't seed grid2op with reset as it is done # earlier @@ -206,7 +209,7 @@ def _aux_reset_new(self, return gym_obs, info def render(self): - """for compatibility with open ai gym render function""" + """for compatibility with open ai gymnasium render function""" return self.init_env.render() def close(self) -> None: diff --git a/grid2op/gym_compat/legacy/__init__.py b/grid2op/gym_compat/legacy/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/grid2op/gym_compat/multi_to_tuple_converter.py b/grid2op/gym_compat/multi_to_tuple_converter.py index 7980608e..444bbe07 100644 --- a/grid2op/gym_compat/multi_to_tuple_converter.py +++ b/grid2op/gym_compat/multi_to_tuple_converter.py @@ -63,7 +63,7 @@ class __AuxMultiToTupleConverter: We choose to encode some variable using `MultiBinary` variable in grid2op. This allows for easy manipulation of them if using these frameworks. - MultiBinary are encoded with gym Tuple of gym Discrete variables. + MultiBinary are encoded with gymnasium Tuple of gymnasium Discrete variables. .. warning:: Depending on the presence absence of gymnasium and gym packages this class might behave differently. @@ -113,7 +113,7 @@ def initialize_space(self, init_space): ) else: raise RuntimeError( - "Impossible to convert a gym space of type {} to a Tuple (it should be of " + "Impossible to convert a gymnasium space of type {} to a Tuple (it should be of " "type space.MultiBinary or space.MultiDiscrete)" "".format(type(init_space)) ) diff --git a/grid2op/gym_compat/multidiscrete_gym_actspace.py b/grid2op/gym_compat/multidiscrete_gym_actspace.py index 60999fd9..1ba4d37f 100644 --- a/grid2op/gym_compat/multidiscrete_gym_actspace.py +++ b/grid2op/gym_compat/multidiscrete_gym_actspace.py @@ -24,7 +24,7 @@ class __AuxMultiDiscreteActSpace: """ - This class allows to convert a grid2op action space into a gym "MultiDiscrete". This means that the action are + This class allows to convert a grid2op action space into a gymnasium "MultiDiscrete". This means that the action are labeled, and instead of describing the action itself, you provide only its ID. .. note:: @@ -302,7 +302,7 @@ def __init__(self, self._aux_check_continuous_elements(el, attr_to_keep, nb_bins, act_sp) self._dims = None - self._functs = None # final functions that is applied to the gym action to map it to a grid2Op action + self._functs = None # final functions that is applied to the gymnasium action to map it to a grid2Op action self._binarizers = None # contains all the kwarg to binarize the data self._types = None nvec = self._get_info() @@ -343,7 +343,7 @@ def _aux_check_continuous_elements(self, el, attr_to_keep, nb_bins, act_sp): @staticmethod def _funct_set(vect): - # gym encodes: + # gymnasium encodes: # for set_bus: 0 -> -1, 1-> 0 (don't change)), 2-> 1, 3 -> 2 # for set_status: 0 -> -1, 1-> 0 (don't change)), 2-> 1 [3 do not exist for set_line_status !] vect -= 1 @@ -351,7 +351,7 @@ def _funct_set(vect): @staticmethod def _funct_change(vect): - # gym encodes 0 -> False, 1 -> True + # gymnasium encodes 0 -> False, 1 -> True vect = vect.astype(dt_bool) return vect @@ -550,14 +550,14 @@ def _handle_attribute(self, res, gym_act_this, attr_nm, funct, type_): def from_gym(self, gym_act): """ - This is the function that is called to transform a gym action (in this case a numpy array!) + This is the function that is called to transform a gymnasium action (in this case a numpy array!) sent by the agent and convert it to a grid2op action that will be sent to the underlying grid2op environment. Parameters ---------- gym_act: ``numpy.ndarray`` - the gym action + the gymnasium action Returns ------- diff --git a/grid2op/gym_compat/scaler_attr_converter.py b/grid2op/gym_compat/scaler_attr_converter.py index 1484df0d..693c28b9 100644 --- a/grid2op/gym_compat/scaler_attr_converter.py +++ b/grid2op/gym_compat/scaler_attr_converter.py @@ -15,7 +15,7 @@ class __AuxScalerAttrConverter: """ - This is a scaler that transforms a initial gym space `init_space` into its scale version. + This is a scaler that transforms a initial gymnasium space `init_space` into its scale version. It can be use to scale the observation by substracting the mean and dividing by the variance for example. diff --git a/grid2op/gym_compat/utils.py b/grid2op/gym_compat/utils.py index 4374ae4a..9f84c670 100644 --- a/grid2op/gym_compat/utils.py +++ b/grid2op/gym_compat/utils.py @@ -36,6 +36,16 @@ from typing import TypeVar ObsType = TypeVar("ObsType") ActType = TypeVar("ActType") + + +if GYMNASIUM_AVAILABLE: + from gymnasium.spaces import Dict as DictType +elif GYM_AVAILABLE: + from gym.spaces import Dict as DictType +else: + from typing import TypeVar + DictType = TypeVar("Dict") + _MIN_GYM_VERSION = version.parse("0.17.2") # this is the last gym version to use the "old" numpy prng diff --git a/grid2op/tests/test_issue_379.py b/grid2op/tests/test_issue_379.py index 087dd9ec..60d0c8e8 100644 --- a/grid2op/tests/test_issue_379.py +++ b/grid2op/tests/test_issue_379.py @@ -6,7 +6,6 @@ # SPDX-License-Identifier: MPL-2.0 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. -import grid2op import unittest import warnings @@ -17,7 +16,118 @@ CAN_TEST_ALL = True if GYMNASIUM_AVAILABLE: from gymnasium.utils.env_checker import check_env - from gymnasium.utils.env_checker import check_reset_return_type, check_reset_options, check_reset_seed + from gymnasium.utils.env_checker import check_reset_return_type, check_reset_options + try: + from gymnasium.utils.env_checker import check_reset_seed + except ImportError: + # not present in most recent version of gymnasium, I copy pasted + # it from an oldest version + import gymnasium + from logging import getLogger + import inspect + from copy import deepcopy + import numpy as np + logger = getLogger() + + + def data_equivalence(data_1, data_2) -> bool: + """Assert equality between data 1 and 2, i.e observations, actions, info. + + Args: + data_1: data structure 1 + data_2: data structure 2 + + Returns: + If observation 1 and 2 are equivalent + """ + if type(data_1) == type(data_2): + if isinstance(data_1, dict): + return data_1.keys() == data_2.keys() and all( + data_equivalence(data_1[k], data_2[k]) for k in data_1.keys() + ) + elif isinstance(data_1, (tuple, list)): + return len(data_1) == len(data_2) and all( + data_equivalence(o_1, o_2) for o_1, o_2 in zip(data_1, data_2) + ) + elif isinstance(data_1, np.ndarray): + return data_1.shape == data_2.shape and np.allclose( + data_1, data_2, atol=0.00001 + ) + else: + return data_1 == data_2 + else: + return False + + + def check_reset_seed(env: gymnasium.Env): + """Check that the environment can be reset with a seed. + + Args: + env: The environment to check + + Raises: + AssertionError: The environment cannot be reset with a random seed, + even though `seed` or `kwargs` appear in the signature. + """ + signature = inspect.signature(env.reset) + if "seed" in signature.parameters or ( + "kwargs" in signature.parameters + and signature.parameters["kwargs"].kind is inspect.Parameter.VAR_KEYWORD + ): + try: + obs_1, info = env.reset(seed=123) + assert ( + obs_1 in env.observation_space + ), "The observation returned by `env.reset(seed=123)` is not within the observation space." + assert ( + env.unwrapped._np_random # pyright: ignore [reportPrivateUsage] + is not None + ), "Expects the random number generator to have been generated given a seed was passed to reset. Mostly likely the environment reset function does not call `super().reset(seed=seed)`." + seed_123_rng = deepcopy( + env.unwrapped._np_random # pyright: ignore [reportPrivateUsage] + ) + + obs_2, info = env.reset(seed=123) + assert ( + obs_2 in env.observation_space + ), "The observation returned by `env.reset(seed=123)` is not within the observation space." + if env.spec is not None and env.spec.nondeterministic is False: + assert data_equivalence( + obs_1, obs_2 + ), "Using `env.reset(seed=123)` is non-deterministic as the observations are not equivalent." + assert ( + env.unwrapped._np_random.bit_generator.state # pyright: ignore [reportPrivateUsage] + == seed_123_rng.bit_generator.state + ), "Mostly likely the environment reset function does not call `super().reset(seed=seed)` as the random generates are not same when the same seeds are passed to `env.reset`." + + obs_3, info = env.reset(seed=456) + assert ( + obs_3 in env.observation_space + ), "The observation returned by `env.reset(seed=456)` is not within the observation space." + assert ( + env.unwrapped._np_random.bit_generator.state # pyright: ignore [reportPrivateUsage] + != seed_123_rng.bit_generator.state + ), "Mostly likely the environment reset function does not call `super().reset(seed=seed)` as the random number generators are not different when different seeds are passed to `env.reset`." + + except TypeError as e: + raise AssertionError( + "The environment cannot be reset with a random seed, even though `seed` or `kwargs` appear in the signature. " + f"This should never happen, please report this issue. The error was: {e}" + ) from e + + seed_param = signature.parameters.get("seed") + # Check the default value is None + if seed_param is not None and seed_param.default is not None: + logger.warning( + "The default seed argument in reset should be `None`, otherwise the environment will by default always be deterministic. " + f"Actual default: {seed_param.default}" + ) + else: + raise gymnasium.error.Error( + "The `reset` method does not provide a `seed` or `**kwargs` keyword argument." + ) + + elif GYM_AVAILABLE: from gym.utils.env_checker import check_env from gym.utils.env_checker import check_reset_return_type, check_reset_options, check_reset_seed diff --git a/grid2op/tests/test_new_reset.py b/grid2op/tests/test_new_reset.py index 9977ffb8..a96eac4f 100644 --- a/grid2op/tests/test_new_reset.py +++ b/grid2op/tests/test_new_reset.py @@ -60,23 +60,23 @@ def _aux_obs_equals(self, obs1, obs2): def test_gym_env(self): gym_env = GymEnv(self.env) - # original way - gym_env.init_env.set_id(0) - gym_env.init_env.seed(0) - obs, *_ = gym_env.reset() + # original way (deprecated) + # gym_env.init_env.set_id(0) + # gym_env.init_env.seed(0) + # obs, info = gym_env.reset() # test with seed in reset gym_env.init_env.set_id(0) - obs_seed, *_ = gym_env.reset(seed=0) + obs_seed, info_seed = gym_env.reset(seed=0) # test with ts_id in reset gym_env.init_env.seed(0) - obs_ts, *_ = gym_env.reset(options={"time serie id": 0}) + obs_ts, info_ts = gym_env.reset(options={"time serie id": 0}) # test with both - obs_both, *_ = gym_env.reset(seed=0, options={"time serie id": 0}) + obs_both, info_both = gym_env.reset(seed=0, options={"time serie id": 0}) - self._aux_obs_equals(obs_seed, obs) - self._aux_obs_equals(obs_ts, obs) - self._aux_obs_equals(obs_both, obs) + # self._aux_obs_equals(obs_seed, obs) + self._aux_obs_equals(obs_ts, obs_seed) + self._aux_obs_equals(obs_both, obs_seed) \ No newline at end of file