From 1474dad1588a9415493de0d44f87808be6d04944 Mon Sep 17 00:00:00 2001
From: DONNOT Benjamin <benjamin.donnot@rte-france.com>
Date: Fri, 1 Mar 2024 16:30:09 +0100
Subject: [PATCH 1/6] improving MDP, action space over [skip ci]

---
 docs/action.rst        |  12 +-
 docs/createbackend.rst |   9 +-
 docs/mdp.rst           | 243 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 253 insertions(+), 11 deletions(-)

diff --git a/docs/action.rst b/docs/action.rst
index a81d0985..98142152 100644
--- a/docs/action.rst
+++ b/docs/action.rst
@@ -459,14 +459,18 @@ package that was formerly "openAI gym").
 
 This includes:
 
-- :class:`grid2op.gym_compat.GymActionSpace` which "represents" actions as a gymnasium `Dict`
-- :class:`grid2op.gym_compat.BoxGymActSpace` which represents actions as gymnasium `Box` 
+- :class:`grid2op.gym_compat.GymActionSpace` which "represents" actions as a gymnasium 
+  `Dict <https://gymnasium.farama.org/api/spaces/composite/#dict>`_
+- :class:`grid2op.gym_compat.BoxGymActSpace` which represents actions as gymnasium 
+  `Box <https://gymnasium.farama.org/api/spaces/fundamental/#box>`_ 
   (actions are numpy arrays). This is especially suited for continuous attributes
   such as redispatching, storage or curtailment.
-- :class:`grid2op.gym_compat.DiscreteActSpace` which represents actions as gymnasium `Discrete`
+- :class:`grid2op.gym_compat.DiscreteActSpace` which represents actions as gymnasium 
+  `Discrete <https://gymnasium.farama.org/api/spaces/fundamental/#discrete>`_
   (actions are integer). This is especially suited for discrete actions such as 
   setting line status or topologies at substation.
-- :class:`grid2op.gym_compat.MultiDiscreteActSpace` which represents actions as gymnasium `Discrete`
+- :class:`grid2op.gym_compat.MultiDiscreteActSpace` which represents actions as gymnasium 
+  `MultiDiscrete <https://gymnasium.farama.org/api/spaces/fundamental/#multidiscrete>`_
   (actions are integer). This is also especially suited for discrete actions such as 
   setting line status or topologies at substation.
 
diff --git a/docs/createbackend.rst b/docs/createbackend.rst
index c343b21a..1490edb6 100644
--- a/docs/createbackend.rst
+++ b/docs/createbackend.rst
@@ -144,9 +144,12 @@ stored in the attribute `self._grid` that can be anything.
     grid2op will only use the information given in the `*_infos()` methods
     (*eg* :func:`grid2op.Backend.Backend.loads_info`) and never by directly accessing `self._grid`
 
-    In other words, `self._grid` can be anything: a PandaPower `Network`, a GridCal `MultiCircuit`,
-    a lightsim2grid `GridModel`, a pypowsybl `Network` (or `SortedNetwork`),
-    a powerfactory `Project` etc. Grid2op will never attempt to access `self._grid`
+    In other words, `self._grid` can be anything: a `PandaPower <https://pandapower.readthedocs.io/en/latest/>`_ `Network`, a 
+    `GridCal <https://gridcal.readthedocs.io/en/latest/>`_ `MultiCircuit`,
+    a `lightsim2grid <lightsim2grid.readthedocs.io/>`_ `GridModel`, a 
+    `pypowsybl <pypowsybl.readthedocs.io/>`_ `Network` (or `SortedNetwork`),
+    a `powerfactory <https://www.digsilent.de/en/scripting-and-automation.html>` `Project` etc. 
+    Grid2op will never attempt to access `self._grid`
 
     (Though, to be perfectly honest, some agents might rely on some type `_grid`, if that's the case, too
     bad for these agents they will need to implement special methods to be compatible with your backend.
diff --git a/docs/mdp.rst b/docs/mdp.rst
index d85a193e..d57dd695 100644
--- a/docs/mdp.rst
+++ b/docs/mdp.rst
@@ -147,6 +147,8 @@ in data in :math:`\mathcal{S}_{\text{im}}^{(\text{out})}`.
   to grid2op some of its internal variables (accessed with the `***_infos()` methods of the backend)
 
 
+TODO do I emphasize that the simulator also contains the grid iteself ?
+
 To make a parallel with similar concepts "simulator",
 represents the physics as in all `"mujoco" environments <https://gymnasium.farama.org/environments/mujoco/>`_ 
 *eg* `Ant <https://gymnasium.farama.org/environments/mujoco/ant>`_ or 
@@ -203,7 +205,7 @@ As we said in introduction of this page, we will model a given scenario in grid2
 - a simulator, which is represented as a function :math:`\text{Sim} : \mathcal{S}_{\text{im}}^{(\text{in})} \to \mathcal{S}_{\text{im}}^{(\text{out})}`
 - some time series :math:`\mathcal{X} = \left\{ \mathcal{X}_t \right\}_{1 \leq t \leq T}`
 
-And we need to define the MDP through the definition of :
+In order to define the MDP we need to define:
 
 - :math:`\mathcal{S}`, the "state space"
 - :math:`\mathcal{A}`, the "action space"
@@ -214,6 +216,235 @@ And we need to define the MDP through the definition of :
   is the probability distribution (over :math:`[0, 1]`) that gives
   the reward :math:`r` after taking action :math:`a` in state :math:`s` which lead to state :math:`s'`
 
+We will do that for a single episode (all episodes follow the same process)
+
+Precisions
+~~~~~~~~~~~
+
+To make the reading of this MDP easier, for this section of the documentation, 
+we adopted the following convention:
+
+- text in :green:`green` will refer to elements that are read directly from the grid
+  by the simulator :math:`\text{Sim}` at the creation of the environment.
+- text in :orange:`orange` will refer to elements that are related to time series :math:`\mathcal{X}`
+- text in :blue:`blue` will refer to elements that can be
+  be informatically modified by the user at the creation of the environment.
+
+In the pure definition of the MDP all text in :green:`green`, :orange:`orange` or 
+:blue:`blue` are exogenous and constant: once the episode starts they cannot be changed
+by anything (including the agent).
+
+We differenciate between these 3 types of "variables" only to clarify what can be modified
+by "who":
+
+- :green:`green` variables depend only on the controlled powergrid
+- :orange:`orange` variables depend only time series
+- :blue:`blue` variables depend only on the way the environment is loaded
+
+.. note::
+  Not all these variables are independant though. If there are for example 3 loads 
+  on the grid, then you need to use time series that somehow can generate
+  3 values at each step for load active values and 3 values at each step for load 
+  reactive values. So the dimension of the :orange:`orange` variables is somehow
+  related to dimension of :green:`green` variables : you cannot use the 
+  time series you want on the grid you want.
+
+Structural informations
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+To define mathematically the MPD we need first to define some notations about the grid manipulated in
+this episode.
+
+We suppose that the structure of the grid does not change during the episode, with:
+
+- :green:`n_line` being the number of "powerlines" (and transformers) which are elements that allow the
+  power flows to actually move from one place to another
+- :green:`n_gen` being the number of generators, which are elements that produces the power
+- :green:`n_load` being the number of consumers, which are elements that consume the power (typically a city or a 
+  large industrial plant manufacturing)
+- :green:`n_storage` being the number of storage units on the grid, which are elements that allow to 
+  convert the power into a form of energy that can be stored (*eg* chemical)
+
+All these elements (side of powerlines, generators, loads and storage units) 
+are connected together at so called "substation". The grid counts :green:`n_sub` such substations.
+We will call :green:`dim_topo := 2 \times n_line + n_gen + n_load + n_storage` the total number
+of elements in the grid.
+
+.. note::
+  This "substation" concept only means that if two elements does not belong to the same substations, they cannot
+  be directly connected at the same "node" of the graph. 
+
+  They can be connected in the same "connex component" of the graph (meaning that there are edges that
+  can connect them) but they cannot be part of the same "node"
+
+Each substation can be divided into :blue:`n_busbar_per_sub` (was only `2` in grid2op <= 1.9.8 and can be 
+any integer > 0 in grid2op version >= 1.9.9).
+
+This :blue:`n_busbar_per_sub` parameters tell the maximum number of independant nodes their can be in a given substation.
+So to count the total maximum number of nodes in the grid, you can do 
+:math:`\text{n\_busbar\_per\_sub} \times \text{n\_sub}`
+
+When the grid is loaded, the backend also informs the environment about the :green:`***_to_subid` vectors
+(*eg* :green:`gen_to_subid`)
+which give, for each element to which substation they are connected. This is how the "constraint" of 
+
+.. note::
+  **Definition**
+
+  With these notations, two elements are connected together if (and only if, that's a 
+  definition after all):
+
+  - they belong to the same substation
+  - they are connected to the same busbar
+
+  In this case, we can also say that these two elements are connected to the same "bus".
+
+  These "buses" are the "nodes" in "the" graph you thought about when looking at a powergrid.
+
+.. note:: 
+  **Definition** ("disconnected bus"): A bus is said to be disconnected if there are no elements connected to it.
+
+.. note:: 
+  **Definition** ("disconnected element"): An element (side of powerlines, generators, loads or storage units) 
+  is said to be disconnected if it is not connected to anything.
+
+Extra references:
++++++++++++++++++
+
+You can modify :blue:`n_busbar_per_sub` in the `grid2op.make` function. For example, 
+by default if you call `grid2op.make("l2rpn_case14_sandbox")` you will have :blue:`n_busbar_per_sub = 2`
+but if you call `grid2op.make("l2rpn_case14_sandbox", n_busbar=3)` you will have
+:blue:`n_busbar_per_sub = 3` see :ref:`substation-mod-el` for more information.
+
+:green:`n_line`, :green:`n_gen`, :green:`n_load`, :green:`n_storage` and :green:`n_sub` depends on the environment
+you loaded when calling `grid2op.make`, for example calling `grid2op.make("l2rpn_case14_sandbox")` 
+will lead to environment
+with :green:`n_line = 20`, :green:`n_gen = 6`, :green:`n_load = 11` and :green:`n_storage = 0`. 
+
+Other informations
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+When loading the environment, there are also some other static data that are loaded which includes:
+
+- :green:`min_storage_p` and :green:`max_storage_p`: the minimum power that can be injected by 
+  each storage units (typically :green:`min_storage_p` :math:`< 0`). These are vectors 
+  (of real numbers) of size :green:`n_storage`
+- :green:`is_gen_renewable`: a vector of `True` / `False` indicating for each generator whether 
+  it comes from new renewable (and intermittent) renewable energy sources (*eg* solar or wind)
+- :green:`is_gen_controlable`: a vector of `True` / `False` indicating for each generator
+  whether it can be controlled by the agent to produce both more or less power 
+  at any given step. This is usually the case for generator which uses
+  as primary energy coal, gaz, nuclear or water (hyrdo powerplant)
+- :green:`min_ramp` and :green:`max_ramp`: are two vector giving the maximum amount
+  of power each generator can be adjusted to produce more / less. Typically,
+  :green:`min_ramp = max_ramp = 0` for non controlable generators.
+
+.. note::
+  These elements are marked :green:`green` because they are loaded by the backend, but strictly speaking
+  they can be specified in other files than the one representing the powergrid.
+
+Action space
+~~~~~~~~~~~~~
+
+At time of writing, grid2op support different type of actions:
+
+- :blue:`change_line_status`: that will change the line status (if it is disconnected 
+  this action will attempt to connect it). It leaves in :math:`\left\{0,1\right\}^{\text{n\_line}}`
+- :blue:`set_line_status`: that will set the line status to a 
+  particular state regardless of the previous state (+1 to attempt a force
+  reconnection on the powerline and -1 to attempt a force disconnection). 
+  There is also a special case where the agent do not want to modify a given line and
+  it can then output "0"
+  It leaves in :math:`\left\{-1, 0, 1\right\}^{\text{n\_line}}`
+- \* :blue:`change_bus`: that will, for each element of the grid change the busbars
+  to which it is connected (*eg* if it was connected on busbar 1 it will attempt to connect it on 
+  busbar 2). This leaves in :math:`\left\{0,1\right\}^{\text{dim\_topo}}`
+- :blue:`set_bus`: that will, for each element control on which busbars you want to assign it
+  to (1, 2, ..., :blue:`n_busbar_per_sub`). To which has been added 2 special cases -1 means "disconnect" this element
+  and 0 means "I don't want to affect" this element. This part of the action space then leaves
+  in :math:`\left\{-1, 0, 1, 2, ..., \text{n\_busbar\_per\_sub} \right\}^{\text{dim\_topo}}`
+- :blue:`storage_p`: for each storage, the agent can chose the setpoint / target power for 
+  each storage units. It leaves in 
+  :math:`[\text{min\_storage\_p}, \text{max\_storage\_p}] \subset \mathbb{R}^{\text{n\_storage}}`
+- :blue:`curtail`: corresponds to the action where the agent ask a generator (using renewable energy sources)
+  to produce less than what would be possible given the current weather. This type of action can 
+  only be performed on renewable generators. It leaves in :math:`[0, 1]^{\text{n\_gen}}` 
+  (to avoid getting the notations even more complex, we won't define exactly the space of this 
+  action. Indeed, writing :math:`[0, 1]^{\text{n\_gen}}` is not entirely true as a non renewable generator
+  will not be affected by this type of action)
+- :blue:`redisp`:  corresponds to the action where the agent is able to modify (to increase or decrease)
+  the generator output values (asking at the some producers to produce more and at some
+  to produce less). It leaves in :math:`[\text{min\_ramp}, \text{max\_ramp}] \subset \mathbb{R}^{\text{n\_gen}}`
+  (remember that for non controlable generators, by definition we suppose that :green:`min_ramp = max_ramp = 0`)
+
+.. note::
+  The :blue:`change_bus` is only available in environment where :blue:`n_busbar_per_sub = 2`
+  otherwise this would not make sense. The action space does not include this 
+  type of actions if :blue:`n_busbar_per_sub != 2`
+
+You might have noticed that every type of actions is written in :blue:`blue`. This is because
+the action space can be defined at the creation of the environment, by specifying in 
+the call to `grid2op.make` the `action_class` to be used. 
+
+Let's call :math:`1_{\text{change\_line\_status}}` either :math:`\left\{0,1\right\}^{\text{n\_line}}` 
+(corresponding to the definition of the :blue:`change_line_status` briefly described above) if the
+:blue:`change_line_status` has been selected by the user (for the entire scenario) or the
+:math:`\emptyset` otherwise (and we do similarly for all other type of actions of course: for example: 
+:math:`1_{redisp} \in \left\{[\text{min\_ramp}, \text{max\_ramp}], \emptyset\right\}`)
+
+Formally then, the action space can then be defined as:
+
+.. math::
+  :nowrap:
+
+  \begin{align*}
+  \mathcal{A}\text{space\_type} =&\left\{\text{change\_line\_status}, \text{set\_line\_status},  \right. \\
+                                 &~\left.\text{change\_bus}, \text{set\_bus}, \right.\\
+                                 &~\left.\text{storage\_p}, \text{curtail}, \text{redisp} \right\} \\
+  \mathcal{A} =&\Pi_{\text{a\_type} \in  \mathcal{A}\text{space\_type} } 1_{\text{a\_type}}\\
+  \end{align*}
+
+.. note::
+  In the grid2op documentation, the words "topological modification" are often used.
+  When that is the case, unless told otherwise it means 
+  :blue:`set_bus` or :blue:`change_bus` type of actions.
+
+
+Extra references:
++++++++++++++++++
+
+Informatically, the :math:`1_{\text{change\_line\_status}}` can be define at the 
+call to `grid2op.make` when the environment is created (and cannot be changed afterwards).
+
+For example, if the user build the environment like this :
+
+.. code-block:: python
+
+  import grid2op
+  from grid2op.Action import PlayableAction
+  env_name = ... # whatever, eg "l2rpn_case14_sandbox"
+  env = grid2op.make(env_name, action_class=PlayableAction)
+
+Then all type of actions are selected and :
+
+.. math::
+  :nowrap:
+
+  \begin{align*}
+  \mathcal{A} =& \left\{0,1\right\}^{\text{n\_line}} \times & \text{change\_line\_status} \\
+               & \left\{-1, 0, 1\right\}^{\text{n\_line}} \times & \text{set\_line\_status} \\
+               & \left\{0,1\right\}^{\text{dim\_topo}} \times & \text{change\_bus} \\
+               & \left\{-1, 0, 1, 2, ..., \text{n\_busbar\_per\_sub} \right\}^{\text{dim\_topo}} \times & \text{set\_bus} \\
+               & ~[\text{min\_storage\_p}, \text{max\_storage\_p}] \times & \text{storage\_p} \\
+               & ~[0, 1]^{\text{n\_gen}} \times & \text{curtail} \\
+               & ~[\text{min\_ramp}, \text{max\_ramp}] & \text{redisp}
+  \end{align*}
+
+State space
+~~~~~~~~~~~~~
+
+
+
 Extensions
 -----------
 
@@ -226,9 +457,13 @@ given to the agent in the observation at time `t` :math:`o_t`.
 
 More specifically, in most grid2op environment (by default at least), none of the 
 physical parameters of the solvers are provided. Also, to represent better
-the daily operation in power systems, only the `t`th row :math:`x_t` of the matrix
-X is given in the observation :math:`o_t`. The components :math:`X_{t', i}` 
-(for :math:`t' > t`) are not given.
+the daily operation in power systems, only the `t` th row of the matrix :math:`\mathcal{X}_t` 
+is given in the observation :math:`o_t`. The components :math:`\mathcal{X}_{t', i}` 
+(for :math:`\forall t' > t`) are not given.
+
+or not partial observatibility
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+TODO remodel the grid2op MDP without the X
 
 Adversarial attacks
 ~~~~~~~~~~~~~~~~~~~~~~~~~~

From b66a6d3894821f47540cc093a54ee5fd8a262e97 Mon Sep 17 00:00:00 2001
From: DONNOT Benjamin <benjamin.donnot@rte-france.com>
Date: Mon, 4 Mar 2024 17:29:29 +0100
Subject: [PATCH 2/6] improving on the MDP definition

---
 docs/action.rst                           |   2 +-
 docs/createbackend.rst                    |  34 ++---
 docs/grid_graph.rst                       |   6 +-
 docs/mdp.rst                              | 148 ++++++++++++++++++++--
 docs/observation.rst                      |  67 +---------
 docs/special.rst                          |  70 +++++++++-
 grid2op/Action/baseAction.py              |  16 +--
 grid2op/Action/serializableActionSpace.py |   4 +-
 grid2op/Backend/backend.py                |  10 +-
 grid2op/Backend/pandaPowerBackend.py      |  16 +--
 grid2op/Converter/IdToAct.py              |   2 +-
 grid2op/Observation/baseObservation.py    |  28 ++--
 grid2op/Plot/PlotPlotly.py                |   4 +-
 grid2op/Rules/BaseRules.py                |   2 +-
 grid2op/Space/GridObjects.py              |  28 ++--
 15 files changed, 285 insertions(+), 152 deletions(-)

diff --git a/docs/action.rst b/docs/action.rst
index 98142152..b2c842f5 100644
--- a/docs/action.rst
+++ b/docs/action.rst
@@ -416,7 +416,7 @@ As we explained in the previous paragraph, some action on one end of a powerline
 powerline or disconnect it. This means they modify the bus of **both** the extremity of the powerline.
 
 Here is a table summarizing how the buses are impacted. We denoted by "`PREVIOUS_OR`" the last bus at which
-the origin end of the powerline was connected and "`PREVIOUS_EX`" the last bus at which the extremity end of the
+the origin side of the powerline was connected and "`PREVIOUS_EX`" the last bus at which the extremity side of the
 powerline was connected. Note that for clarity when something is not modified by the action we decided to write on
 the table "not modified" (this entails that after this action, if the powerline is connected then "new origin bus" is
 "`PREVIOUS_OR`" and "new extremity bus" is "`PREVIOUS_EX`"). We remind the reader that "-1" encode for a
diff --git a/docs/createbackend.rst b/docs/createbackend.rst
index 1490edb6..9889c05c 100644
--- a/docs/createbackend.rst
+++ b/docs/createbackend.rst
@@ -259,7 +259,7 @@ Name                       See paragraph   Type         Size       Description
 `line_ex_to_subid`_         :ref:`subid`   vect, int    `n_line`_   For each powerline, it gives the substation id to which its **extremity** end is connected
 `name_load`_                               vect, str    `n_load`_  (optional) name of each load on the grid [if not set, by default it will be "load_$LoadSubID_$LoadID" for example "load_1_10" if the load with id 10 is connected to substation with id 1]
 `name_gen`_                                vect, str    `n_gen`_   (optional) name of each generator on the grid [if not set, by default it will be "gen_$GenSubID_$GenID" for example "gen_2_42" if the generator with id 42 is connected to substation with id 2]
-`name_line`_                               vect, str    `n_line`_  (optional) name of each powerline (and transformers !) on the grid [if not set, by default it will be "$SubOrID_SubExID_LineID" for example "1_4_57" if the powerline with id 57 has its origin end connected to substation with id 1 and its extremity end connected to substation with id 4]
+`name_line`_                               vect, str    `n_line`_  (optional) name of each powerline (and transformers !) on the grid [if not set, by default it will be "$SubOrID_SubExID_LineID" for example "1_4_57" if the powerline with id 57 has its origin side connected to substation with id 1 and its extremity side connected to substation with id 4]
 `name_sub`_                                vect, str    `n_sub`_   (optional) name of each substation on the grid [if not set, by default it will be "sub_$SubID" for example "sub_41" for the substation with id 41]
 `sub_info`_                 :ref:`sub-i`   vect, int    `n_sub`_    (can be automatically set if you don't initialize it) For each substation, it gives the number of elements connected to it ("elements" here denotes: powerline - and transformer- ends, load or generator)
 `dim_topo`_                 :ref:`sub-i`   int          NA          (can be automatically set if you don't initialize it) Total number of elements on the grid ("elements" here denotes: powerline - and transformer- ends, load or generator)
@@ -324,7 +324,7 @@ extremely complex way to say you have to do this:
 Note the number for each element in the substation.
 
 In this example, for substaion with id 0 (bottom left) you decided
-that the powerline with id 0 (connected at this substation at its origin end) will be the "first object of this
+that the powerline with id 0 (connected at this substation at its origin side) will be the "first object of this
 substation". Then the "Load 0" is the second object [remember index a 0 based, so the second object has id 1],
 generator 0 is the third object of this substation (you can know it with the "3" near it) etc.
 
@@ -448,12 +448,12 @@ First, have a look at substation 0:
 
 You know that, at this substation 0 there are `6` elements connected. In this example, these are:
 
-- origin end of Line 0
+- origin side of Line 0
 - Load 0
 - gen 0
-- origin end of line 1
-- origin end of line 2
-- origin end of line 3
+- origin side of line 1
+- origin side of line 2
+- origin side of line 3
 
 Given that, you can fill:
 
@@ -478,12 +478,12 @@ You defined (in a purely arbitrary manner):
 
 So you get:
 
-- first component of `line_or_to_sub_pos` is 0 [because "origin end of line 0" is "element 0" of this substation]
+- first component of `line_or_to_sub_pos` is 0 [because "origin side of line 0" is "element 0" of this substation]
 - first component of `load_to_sub_pos` is 1 [because "load 0" is "element 1" of this substation]
 - first component of `gen_to_sub_pos` is 2 [because "gen 0" is "element 2" of this substation]
-- fourth component of `line_or_to_sub_pos` is 3 [because "origin end of line 3" is "element 3" of this substation]
-- third component of `line_or_to_sub_pos` is 4 [because "origin end of line 2" is "element 4" of this substation]
-- second component of `line_or_to_sub_pos` is 5 [because "origin end of line 1" is "element 5" of this substation]
+- fourth component of `line_or_to_sub_pos` is 3 [because "origin side of line 3" is "element 3" of this substation]
+- third component of `line_or_to_sub_pos` is 4 [because "origin side of line 2" is "element 4" of this substation]
+- second component of `line_or_to_sub_pos` is 5 [because "origin side of line 1" is "element 5" of this substation]
 
 This is showed in the figure below:
 
@@ -583,22 +583,22 @@ At the end, the `apply_action` function of the backend should look something lik
                 ... # the way you do that depends on the `internal representation of the grid`
         lines_or_bus = backendAction.get_lines_or_bus()
         for line_id, new_bus in lines_or_bus:
-            # modify the "busbar" of the origin end of powerline line_id
+            # modify the "busbar" of the origin side of powerline line_id
             if new_bus == -1:
-                # the origin end of powerline is disconnected in the action, disconnect it on your internal representation of the grid
+                # the origin side of powerline is disconnected in the action, disconnect it on your internal representation of the grid
                 ... # the way you do that depends on the `internal representation of the grid`
             else:
-                # the origin end of powerline is moved to either busbar 1 (in this case `new_bus` will be `1`)
+                # the origin side of powerline is moved to either busbar 1 (in this case `new_bus` will be `1`)
                 # or to busbar 2 (in this case `new_bus` will be `2`)
                 ... # the way you do that depends on the `internal representation of the grid`
         lines_ex_bus = backendAction.get_lines_ex_bus()
         for line_id, new_bus in lines_ex_bus:
-            # modify the "busbar" of the extremity end of powerline line_id
+            # modify the "busbar" of the extremity side of powerline line_id
             if new_bus == -1:
-                # the extremity end of powerline is disconnected in the action, disconnect it on your internal representation of the grid
+                # the extremity side of powerline is disconnected in the action, disconnect it on your internal representation of the grid
                 ... # the way you do that depends on the `internal representation of the grid`
             else:
-                # the extremity end of powerline is moved to either busbar 1 (in this case `new_bus` will be `1`)
+                # the extremity side of powerline is moved to either busbar 1 (in this case `new_bus` will be `1`)
                 # or to busbar 2 (in this case `new_bus` will be `2`)
                 ... # the way you do that depends on the `internal representation of the grid`
 
@@ -800,7 +800,7 @@ And you do chat for all substations, giving:
 
 So in this simple example, the first element of the topology vector will represent the origin of powerline 0,
 the second element will represent the load 0, the 7th element (id 6, remember python index are 0 based) represent
-first element of substation 1, so in this case extremity end of powerline 3, the 8th element the generator 1, etc.
+first element of substation 1, so in this case extremity side of powerline 3, the 8th element the generator 1, etc.
 up to element with id 20 whith is the last element of the last substation, in this case extremity of powerline 7.
 
 Once you know the order, the encoding is pretty straightforward:
diff --git a/docs/grid_graph.rst b/docs/grid_graph.rst
index bdeae4c5..c9733b2c 100644
--- a/docs/grid_graph.rst
+++ b/docs/grid_graph.rst
@@ -326,11 +326,11 @@ To know what element of the grid is the "42nd", you can:
    case the extremity side of powerline `line_id`.
 2) look at the table  :attr:`grid2op.Space.GridObjects.grid_objects_types` and especially the line 42 so
    `env.grid_objects_types[42,:]` which contains this information as well. Each column of this table encodes
-   for one type of element (first column is substation, second is load, then generator, then origin end of
-   powerline then extremity end of powerline and finally storage unit. Each will have "-1" if the element
+   for one type of element (first column is substation, second is load, then generator, then origin side of
+   powerline then extremity side of powerline and finally storage unit. Each will have "-1" if the element
    is not of that type, and otherwise and id > 0. Taking the same example as for the above bullet point!
    `env.grid_objects_types[42,:] = [sub_id, -1, -1, -1, line_id, -1]` meaning the "42nd" element of the grid
-   if the extremity end (because it's the 5th column) of id `line_id` (the other element being marked as "-1").
+   if the extremity side (because it's the 5th column) of id `line_id` (the other element being marked as "-1").
 3) refer to the :func:`grid2op.Space.GridObject.topo_vect_element` for an "easier" way to retrieve information
    about this element. 
 
diff --git a/docs/mdp.rst b/docs/mdp.rst
index d57dd695..fef448e0 100644
--- a/docs/mdp.rst
+++ b/docs/mdp.rst
@@ -109,7 +109,7 @@ MDP):
   :nowrap:
 
   \begin{align*}
-      \min_{\pi \in \Pi}  ~& \sum_{t=1}^T \mathbb{E} r_t \\
+      \min_{\pi \in \Pi}  ~& \sum_{t=1}^T \mathbb{E} \left( r_t \right) \\
       \text{s.t.} ~ \\
                      & \forall t, a_t \sim  \pi (s_{t}) & \text{policy produces the action} \\
                      & \forall t, s_{t+1} \sim \mathcal{L}_S(s_t, a_t) & \text{environment produces next state} \\
@@ -431,23 +431,140 @@ Then all type of actions are selected and :
   :nowrap:
 
   \begin{align*}
-  \mathcal{A} =& \left\{0,1\right\}^{\text{n\_line}} \times & \text{change\_line\_status} \\
-               & \left\{-1, 0, 1\right\}^{\text{n\_line}} \times & \text{set\_line\_status} \\
-               & \left\{0,1\right\}^{\text{dim\_topo}} \times & \text{change\_bus} \\
-               & \left\{-1, 0, 1, 2, ..., \text{n\_busbar\_per\_sub} \right\}^{\text{dim\_topo}} \times & \text{set\_bus} \\
-               & ~[\text{min\_storage\_p}, \text{max\_storage\_p}] \times & \text{storage\_p} \\
+  \mathcal{A} =& \left\{0,1\right\}^{\text{n\_line}}~ \times & \text{change\_line\_status} \\
+               & \left\{-1, 0, 1\right\}^{\text{n\_line}}~ \times & \text{set\_line\_status} \\
+               & \left\{0,1\right\}^{\text{dim\_topo}}~ \times & \text{change\_bus} \\
+               & \left\{-1, 0, 1, 2, ..., \text{n\_busbar\_per\_sub} \right\}^{\text{dim\_topo}}~ \times & \text{set\_bus} \\
+               & ~[\text{min\_storage\_p}, \text{max\_storage\_p}]~ \times & \text{storage\_p} \\
                & ~[0, 1]^{\text{n\_gen}} \times & \text{curtail} \\
                & ~[\text{min\_ramp}, \text{max\_ramp}] & \text{redisp}
   \end{align*}
 
+You can also build the same environment like this:
+
+.. code-block:: python
+
+  import grid2op
+  from grid2op.Action import TopologySetAction
+  same_env_name = ... # whatever, eg "l2rpn_case14_sandbox"
+  env = grid2op.make(same_env_name, action_class=TopologySetAction)
+
+Which will lead the following action space, because the user ask to 
+use only "topological actions" (including line status) with only the
+"set" way of modifying them.
+
+.. math::
+  :nowrap:
+
+  \begin{align*}
+  \mathcal{A} =& \left\{-1, 0, 1\right\}^{\text{n\_line}}~ \times & \text{set\_line\_status} \\
+               & \left\{-1, 0, 1, 2, ..., \text{n\_busbar\_per\_sub} \right\}^{\text{dim\_topo}}~ & \text{set\_bus} \\
+  \end{align*}
+
+The page :ref:`action-module` of the documentation provides you with all types of
+actions you you can use in grid2op.
+
+.. note::
+  If you use a compatibility with the popular gymnasium (previously gym)
+  you can also specify the action space with the "`attr_to_keep`"
+  key-word argument.
+
+.. _mdp-state-space-def:
+
 State space
 ~~~~~~~~~~~~~
 
+By default in grid2op, the state space shown to the agent (the so called 
+"observation"). In this part of the documentation, we will described something
+slightly different which is the "state space" of the MDP.
+
+The main difference is that this "state space" will include future data about the 
+environment (*eg* the :math:`\mathcal{X}` matrix). You can refer to 
+section :ref:`pomdp` or :ref:`non-pomdp` of this page of the documentation.
+
+.. note::
+  We found it easier to show the MDP without the introduction of the
+  "observation kernel", so keep in mind that this paragraph is not
+  representative of the observation in grid2op but is "purely
+  theoretical".
+
+The state space is defined by different type of attributes and we will not list
+them all here (you can find a detailed list of everything available to the 
+agent in the :ref:`observation_module` page of the documentation.) The
+"state space" is then made of:
+
+- some part of the outcome of the solver: 
+  :math:`S_{\text{grid}} \subset \mathcal{S}_{\text{im}}^{(\text{out})}`, this 
+  includes but is not limited to the loads active values `load_p`_, 
+  loads reactive values `load_q`_, voltage magnitude 
+  at each loads `load_v`_, the same kind of attributes but for generators
+  `gen_p`_, `gen_q`_, `gen_v`_, `gen_theta`_  and also for powerlines 
+  `p_or`_, `q_or`_, `v_or`_, `a_or`_, `theta_or`_, `p_ex`_, `q_ex`_, `v_ex`_, 
+  `a_ex`_, `theta_ex`_, `rho`_ etc.
+- some attributes related to "redispatching" (which is a type of actions) that is
+  computed by the environment (see :ref:`mdp-transition-kernel-def` for more information)
+  which includes `target_dispatch`_ and `actual_dispatch`_ or the curtailment
+  `gen_p_before_curtail`_, `curtailment_mw`_, `curtailment`_ or `curtailment_limit`_ 
+- some attributes related to "storage units", for example `storage_charge`_ , 
+  `storage_power_target`_, `storage_power`_ or `storage_theta`_  
+- some related to "date" and "time", `year`_, `month`_, `day`_, `hour_of_day`_, 
+  `minute_of_hour`_, `day_of_week`_, `current_step`_, `max_step`_, `delta_time`_  
+- finally some related to the :blue:`rules of the game` like 
+  `timestep_overflow`_, `time_before_cooldown_line`_ or `time_before_cooldown_sub`_
+
+And, to make it "Markovian" we also need to include :
+
+- the (constant) values of :math:`\mathcal{S}_{\text{im}}^{(\text{in})}` that 
+  are not "part of" :math:`\mathcal{X}`. This might include some physical
+  parameters of some elements of the grid (like transformers or powerlines) or
+  some other parameters of the solver controlling either the equations to be 
+  solved or the solver to use etc. \*
+- the complete matrix :math:`\mathcal{X}` which include the exact knowledge of 
+  past, present **and future** loads and generation for the entire scenario (which 
+  is not possible in practice). The matrix itself is constant.
+- the index representing at which "step" of the matrix :math:`\mathcal{X}` the 
+  current data are being used by the environment.
+
+.. note::
+  \* grid2op is build to be "simulator agnostic" so all this part of the "state space"
+  is not easily accessible through the grid2op API. To access (or to modify) them
+  you need to be aware of the implementation of the :class:`grid2op.Backend.Backend`
+  you are using.
+
+.. _mdp-transition-kernel-def:
+
+Transition Kernel
+~~~~~~~~~~~~~~~~~~~
+
+TODO 
 
+Reward Kernel
+~~~~~~~~~~~~~~~~~~~
+
+And to finish this (rather long) description of grid2op's MDP we need to mention the
+"reward kernel".
+
+This "kernel" computes the reward associated to taking the action :math:`a` in step
+:math:`s` that lead to step :math:`s'`. In most cases, the 
+reward in grid2op is a deterministic function and depends only on the grid state.
+
+In grid2op, every environment comes with a pre-defined :blue:`reward function` that
+can be fully customized by the user when the environment is created or
+even afterwards (but is still constant during an entire episode of course).
+
+For more information, you might want to have a look at the :ref:`reward-module` page
+of this documentation.
 
 Extensions
 -----------
 
+TODO: this part of the section is still an ongoing work.
+
+Let us know if you want to contribute !
+
+
+.. _pomdp:
+
 Partial Observatibility
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -461,8 +578,11 @@ the daily operation in power systems, only the `t` th row of the matrix :math:`\
 is given in the observation :math:`o_t`. The components :math:`\mathcal{X}_{t', i}` 
 (for :math:`\forall t' > t`) are not given.
 
-or not partial observatibility
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. _non-pomdp:
+
+Or not partial observatibility ?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 TODO remodel the grid2op MDP without the X
 
 Adversarial attacks
@@ -479,6 +599,9 @@ includes a model of the world that can be different from the grid of the environ
 Simulator dynamics can be more complex
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+TODO, Backend does not need to "exactly map the simulator" there are 
+some examples below:
+
 Hide elements from the grid2op environment
 ++++++++++++++++++++++++++++++++++++++++++
 
@@ -504,11 +627,20 @@ accurate description of the grid and only "subsample"
 (*eg* at a frequency of every 5 mins) provide grid2op
 with some information.
 
+Handle the topology differently
+++++++++++++++++++++++++++++++++++
+
+Backend can operate switches, only requirement from grid2op is to map the topology
+to switches.
 
 Some constraints
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 TODO
 
+Operator attention: alarm and alter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TODO
 
 .. include:: final.rst
diff --git a/docs/observation.rst b/docs/observation.rst
index 86bc3bab..97a88110 100644
--- a/docs/observation.rst
+++ b/docs/observation.rst
@@ -1,70 +1,7 @@
 .. currentmodule:: grid2op.Observation
 
-.. _n_gen: ./space.html#grid2op.Space.GridObjects.n_gen
-.. _n_load: ./space.html#grid2op.Space.GridObjects.n_load
-.. _n_line: ./space.html#grid2op.Space.GridObjects.n_line
-.. _n_sub: ./space.html#grid2op.Space.GridObjects.n_sub
-.. _n_storage: ./space.html#grid2op.Space.GridObjects.n_storage
-.. _dim_topo: ./space.html#grid2op.Space.GridObjects.dim_topo
-.. _dim_alarms: ./space.html#grid2op.Space.GridObjects.dim_alarms
-.. _dim_alerts: ./space.html#grid2op.Space.GridObjects.dim_alerts
-.. _year: ./observation.html#grid2op.Observation.BaseObservation.year
-.. _month: ./observation.html#grid2op.Observation.BaseObservation.month
-.. _day: ./observation.html#grid2op.Observation.BaseObservation.day
-.. _hour_of_day: ./observation.html#grid2op.Observation.BaseObservation.hour_of_day
-.. _minute_of_hour: ./observation.html#grid2op.Observation.BaseObservation.minute_of_hour
-.. _day_of_week: ./observation.html#grid2op.Observation.BaseObservation.day_of_week
-.. _gen_p: ./observation.html#grid2op.Observation.BaseObservation.gen_p
-.. _gen_q: ./observation.html#grid2op.Observation.BaseObservation.gen_q
-.. _gen_v: ./observation.html#grid2op.Observation.BaseObservation.gen_v
-.. _load_p: ./observation.html#grid2op.Observation.BaseObservation.load_p
-.. _load_q: ./observation.html#grid2op.Observation.BaseObservation.load_q
-.. _load_v: ./observation.html#grid2op.Observation.BaseObservation.load_v
-.. _p_or: ./observation.html#grid2op.Observation.BaseObservation.p_or
-.. _q_or: ./observation.html#grid2op.Observation.BaseObservation.q_or
-.. _v_or: ./observation.html#grid2op.Observation.BaseObservation.v_or
-.. _a_or: ./observation.html#grid2op.Observation.BaseObservation.a_or
-.. _p_ex: ./observation.html#grid2op.Observation.BaseObservation.p_ex
-.. _q_ex: ./observation.html#grid2op.Observation.BaseObservation.q_ex
-.. _v_ex: ./observation.html#grid2op.Observation.BaseObservation.v_ex
-.. _a_ex: ./observation.html#grid2op.Observation.BaseObservation.a_ex
-.. _rho: ./observation.html#grid2op.Observation.BaseObservation.rho
-.. _topo_vect: ./observation.html#grid2op.Observation.BaseObservation.topo_vect
-.. _line_status: ./observation.html#grid2op.Observation.BaseObservation.line_status
-.. _timestep_overflow: ./observation.html#grid2op.Observation.BaseObservation.timestep_overflow
-.. _time_before_cooldown_line: ./observation.html#grid2op.Observation.BaseObservation.time_before_cooldown_line
-.. _time_before_cooldown_sub: ./observation.html#grid2op.Observation.BaseObservation.time_before_cooldown_sub
-.. _time_next_maintenance: ./observation.html#grid2op.Observation.BaseObservation.time_next_maintenance
-.. _duration_next_maintenance: ./observation.html#grid2op.Observation.BaseObservation.duration_next_maintenance
-.. _target_dispatch: ./observation.html#grid2op.Observation.BaseObservation.target_dispatch
-.. _actual_dispatch: ./observation.html#grid2op.Observation.BaseObservation.actual_dispatch
-.. _storage_charge: ./observation.html#grid2op.Observation.BaseObservation.storage_charge
-.. _storage_power_target: ./observation.html#grid2op.Observation.BaseObservation.storage_power_target
-.. _storage_power: ./observation.html#grid2op.Observation.BaseObservation.storage_power
-.. _gen_p_before_curtail: ./observation.html#grid2op.Observation.BaseObservation.gen_p_before_curtail
-.. _curtailment: ./observation.html#grid2op.Observation.BaseObservation.curtailment
-.. _curtailment_limit: ./observation.html#grid2op.Observation.BaseObservation.curtailment_limit
-.. _is_alarm_illegal: ./observation.html#grid2op.Observation.BaseObservation.is_alarm_illegal
-.. _time_since_last_alarm: ./observation.html#grid2op.Observation.BaseObservation.time_since_last_alarm
-.. _last_alarm: ./observation.html#grid2op.Observation.BaseObservation.last_alarm
-.. _attention_budget: ./observation.html#grid2op.Observation.BaseObservation.attention_budget
-.. _max_step: ./observation.html#grid2op.Observation.BaseObservation.max_step
-.. _current_step: ./observation.html#grid2op.Observation.BaseObservation.current_step
-.. _delta_time: ./observation.html#grid2op.Observation.BaseObservation.delta_time
-.. _gen_margin_up: ./observation.html#grid2op.Observation.BaseObservation.gen_margin_up
-.. _gen_margin_down: ./observation.html#grid2op.Observation.BaseObservation.gen_margin_down
-.. _curtailment_mw: ./observation.html#grid2op.Observation.BaseObservation.curtailment_mw
-.. _theta_or: ./observation.html#grid2op.Observation.BaseObservation.theta_or
-.. _theta_ex: ./observation.html#grid2op.Observation.BaseObservation.theta_ex
-.. _gen_theta: ./observation.html#grid2op.Observation.BaseObservation.gen_theta
-.. _load_theta: ./observation.html#grid2op.Observation.BaseObservation.load_theta
-.. _active_alert: ./observation.html#grid2op.Observation.BaseObservation.active_alert
-.. _time_since_last_alert: ./observation.html#grid2op.Observation.BaseObservation.time_since_last_alert
-.. _alert_duration: ./observation.html#grid2op.Observation.BaseObservation.alert_duration
-.. _total_number_of_alert: ./observation.html#grid2op.Observation.BaseObservation.total_number_of_alert
-.. _time_since_last_attack: ./observation.html#grid2op.Observation.BaseObservation.time_since_last_attack
-.. _was_alert_used_after_attack: ./observation.html#grid2op.Observation.BaseObservation.was_alert_used_after_attack
-.. _attack_under_alert: ./observation.html#grid2op.Observation.BaseObservation.attack_under_alert
+.. include:: special.rst  
+.. include the observation attributes
 
 .. _observation_module:
 
diff --git a/docs/special.rst b/docs/special.rst
index 14223517..44bcdfb8 100644
--- a/docs/special.rst
+++ b/docs/special.rst
@@ -1,7 +1,6 @@
 .. Color profiles for Sphinx.
 .. Has to be used with hacks.css
 .. (https://bitbucket.org/lbesson/web-sphinx/src/master/.static/hacks.css)
-
 .. role:: black
 .. role:: gray
 .. role:: grey
@@ -39,6 +38,71 @@
 .. role:: center
 .. role:: left
 .. role:: right
-
-
 .. (c) Lilian Besson, 2011-2016, https://bitbucket.org/lbesson/web-sphinx/
+
+.. _n_gen: ./space.html#grid2op.Space.GridObjects.n_gen
+.. _n_load: ./space.html#grid2op.Space.GridObjects.n_load
+.. _n_line: ./space.html#grid2op.Space.GridObjects.n_line
+.. _n_sub: ./space.html#grid2op.Space.GridObjects.n_sub
+.. _n_storage: ./space.html#grid2op.Space.GridObjects.n_storage
+.. _dim_topo: ./space.html#grid2op.Space.GridObjects.dim_topo
+.. _dim_alarms: ./space.html#grid2op.Space.GridObjects.dim_alarms
+.. _dim_alerts: ./space.html#grid2op.Space.GridObjects.dim_alerts
+.. _year: ./observation.html#grid2op.Observation.BaseObservation.year
+.. _month: ./observation.html#grid2op.Observation.BaseObservation.month
+.. _day: ./observation.html#grid2op.Observation.BaseObservation.day
+.. _hour_of_day: ./observation.html#grid2op.Observation.BaseObservation.hour_of_day
+.. _minute_of_hour: ./observation.html#grid2op.Observation.BaseObservation.minute_of_hour
+.. _day_of_week: ./observation.html#grid2op.Observation.BaseObservation.day_of_week
+.. _gen_p: ./observation.html#grid2op.Observation.BaseObservation.gen_p
+.. _gen_q: ./observation.html#grid2op.Observation.BaseObservation.gen_q
+.. _gen_v: ./observation.html#grid2op.Observation.BaseObservation.gen_v
+.. _load_p: ./observation.html#grid2op.Observation.BaseObservation.load_p
+.. _load_q: ./observation.html#grid2op.Observation.BaseObservation.load_q
+.. _load_v: ./observation.html#grid2op.Observation.BaseObservation.load_v
+.. _p_or: ./observation.html#grid2op.Observation.BaseObservation.p_or
+.. _q_or: ./observation.html#grid2op.Observation.BaseObservation.q_or
+.. _v_or: ./observation.html#grid2op.Observation.BaseObservation.v_or
+.. _a_or: ./observation.html#grid2op.Observation.BaseObservation.a_or
+.. _p_ex: ./observation.html#grid2op.Observation.BaseObservation.p_ex
+.. _q_ex: ./observation.html#grid2op.Observation.BaseObservation.q_ex
+.. _v_ex: ./observation.html#grid2op.Observation.BaseObservation.v_ex
+.. _a_ex: ./observation.html#grid2op.Observation.BaseObservation.a_ex
+.. _rho: ./observation.html#grid2op.Observation.BaseObservation.rho
+.. _topo_vect: ./observation.html#grid2op.Observation.BaseObservation.topo_vect
+.. _line_status: ./observation.html#grid2op.Observation.BaseObservation.line_status
+.. _timestep_overflow: ./observation.html#grid2op.Observation.BaseObservation.timestep_overflow
+.. _time_before_cooldown_line: ./observation.html#grid2op.Observation.BaseObservation.time_before_cooldown_line
+.. _time_before_cooldown_sub: ./observation.html#grid2op.Observation.BaseObservation.time_before_cooldown_sub
+.. _time_next_maintenance: ./observation.html#grid2op.Observation.BaseObservation.time_next_maintenance
+.. _duration_next_maintenance: ./observation.html#grid2op.Observation.BaseObservation.duration_next_maintenance
+.. _target_dispatch: ./observation.html#grid2op.Observation.BaseObservation.target_dispatch
+.. _actual_dispatch: ./observation.html#grid2op.Observation.BaseObservation.actual_dispatch
+.. _storage_charge: ./observation.html#grid2op.Observation.BaseObservation.storage_charge
+.. _storage_power_target: ./observation.html#grid2op.Observation.BaseObservation.storage_power_target
+.. _storage_power: ./observation.html#grid2op.Observation.BaseObservation.storage_power
+.. _storage_theta: ./observation.html#grid2op.Observation.BaseObservation.storage_theta
+.. _gen_p_before_curtail: ./observation.html#grid2op.Observation.BaseObservation.gen_p_before_curtail
+.. _curtailment: ./observation.html#grid2op.Observation.BaseObservation.curtailment
+.. _curtailment_limit: ./observation.html#grid2op.Observation.BaseObservation.curtailment_limit
+.. _is_alarm_illegal: ./observation.html#grid2op.Observation.BaseObservation.is_alarm_illegal
+.. _time_since_last_alarm: ./observation.html#grid2op.Observation.BaseObservation.time_since_last_alarm
+.. _last_alarm: ./observation.html#grid2op.Observation.BaseObservation.last_alarm
+.. _attention_budget: ./observation.html#grid2op.Observation.BaseObservation.attention_budget
+.. _max_step: ./observation.html#grid2op.Observation.BaseObservation.max_step
+.. _current_step: ./observation.html#grid2op.Observation.BaseObservation.current_step
+.. _delta_time: ./observation.html#grid2op.Observation.BaseObservation.delta_time
+.. _gen_margin_up: ./observation.html#grid2op.Observation.BaseObservation.gen_margin_up
+.. _gen_margin_down: ./observation.html#grid2op.Observation.BaseObservation.gen_margin_down
+.. _curtailment_mw: ./observation.html#grid2op.Observation.BaseObservation.curtailment_mw
+.. _theta_or: ./observation.html#grid2op.Observation.BaseObservation.theta_or
+.. _theta_ex: ./observation.html#grid2op.Observation.BaseObservation.theta_ex
+.. _gen_theta: ./observation.html#grid2op.Observation.BaseObservation.gen_theta
+.. _load_theta: ./observation.html#grid2op.Observation.BaseObservation.load_theta
+.. _active_alert: ./observation.html#grid2op.Observation.BaseObservation.active_alert
+.. _time_since_last_alert: ./observation.html#grid2op.Observation.BaseObservation.time_since_last_alert
+.. _alert_duration: ./observation.html#grid2op.Observation.BaseObservation.alert_duration
+.. _total_number_of_alert: ./observation.html#grid2op.Observation.BaseObservation.total_number_of_alert
+.. _time_since_last_attack: ./observation.html#grid2op.Observation.BaseObservation.time_since_last_attack
+.. _was_alert_used_after_attack: ./observation.html#grid2op.Observation.BaseObservation.was_alert_used_after_attack
+.. _attack_under_alert: ./observation.html#grid2op.Observation.BaseObservation.attack_under_alert
diff --git a/grid2op/Action/baseAction.py b/grid2op/Action/baseAction.py
index 2f6bffc2..c9b705a9 100644
--- a/grid2op/Action/baseAction.py
+++ b/grid2op/Action/baseAction.py
@@ -2264,8 +2264,8 @@ def update(self, dict_):
             # there is a shortcut to do that:
             disconnect_powerline2 = env.disconnect_powerline(line_id=1)
 
-        *Example 3*: force the reconnection of the powerline of id 5 by connected it to bus 1 on its origin end and
-        bus 2 on its extremity end.
+        *Example 3*: force the reconnection of the powerline of id 5 by connected it to bus 1 on its origin side and
+        bus 2 on its extremity side.
 
         .. code-block:: python
 
@@ -2688,14 +2688,14 @@ def _check_for_ambiguity(self):
             disco_or = self._set_topo_vect[cls.line_or_pos_topo_vect] == -1
             if (self._set_topo_vect[cls.line_ex_pos_topo_vect][disco_or] > 0).any():
                 raise InvalidLineStatus(
-                    "A powerline is connected (set to a bus at extremity end) and "
-                    "disconnected (set to bus -1 at origin end)"
+                    "A powerline is connected (set to a bus at extremity side) and "
+                    "disconnected (set to bus -1 at origin side)"
                 )
             disco_ex = self._set_topo_vect[cls.line_ex_pos_topo_vect] == -1
             if (self._set_topo_vect[cls.line_or_pos_topo_vect][disco_ex] > 0).any():
                 raise InvalidLineStatus(
-                    "A powerline is connected (set to a bus at origin end) and "
-                    "disconnected (set to bus -1 at extremity end)"
+                    "A powerline is connected (set to a bus at origin side) and "
+                    "disconnected (set to bus -1 at extremity side)"
                 )
 
         # if i disconnected of a line, but i modify also the bus where it's connected
@@ -3704,8 +3704,8 @@ def effect_on(
 
             - if a powerline is inspected then the keys are:
 
-                - "change_bus_or": whether or not the origin end will be moved from one bus to another
-                - "change_bus_ex": whether or not the extremity end will be moved from one bus to another
+                - "change_bus_or": whether or not the origin side will be moved from one bus to another
+                - "change_bus_ex": whether or not the extremity side will be moved from one bus to another
                 - "set_bus_or": the new bus where the origin will be moved
                 - "set_bus_ex": the new bus where the extremity will be moved
                 - "set_line_status": the new status of the power line
diff --git a/grid2op/Action/serializableActionSpace.py b/grid2op/Action/serializableActionSpace.py
index f163da11..723da752 100644
--- a/grid2op/Action/serializableActionSpace.py
+++ b/grid2op/Action/serializableActionSpace.py
@@ -474,10 +474,10 @@ def reconnect_powerline(
             The powerline to be disconnected.
 
         bus_or: ``int``
-            On which bus to reconnect the powerline at its origin end
+            On which bus to reconnect the powerline at its origin side
 
         bus_ex: ``int``
-            On which bus to reconnect the powerline at its extremity end
+            On which bus to reconnect the powerline at its extremity side
         previous_action
 
         Returns
diff --git a/grid2op/Backend/backend.py b/grid2op/Backend/backend.py
index 976c79f9..db8a6cf0 100644
--- a/grid2op/Backend/backend.py
+++ b/grid2op/Backend/backend.py
@@ -493,7 +493,7 @@ def lines_or_info(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
         .. note::
             It is called after the solver has been ran, only in case of success (convergence).
             
-        It returns the information extracted from the _grid at the origin end of each powerline.
+        It returns the information extracted from the _grid at the origin side of each powerline.
 
         For assumption about the order of the powerline flows return in this vector, see the help of the
         :func:`Backend.get_line_status` method.
@@ -526,7 +526,7 @@ def lines_ex_info(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
         .. note::
             It is called after the solver has been ran, only in case of success (convergence).
             
-        It returns the information extracted from the _grid at the extremity end of each powerline.
+        It returns the information extracted from the _grid at the extremity side of each powerline.
 
         For assumption about the order of the powerline flows return in this vector, see the help of the
         :func:`Backend.get_line_status` method.
@@ -683,10 +683,10 @@ def get_line_flow(self) -> np.ndarray:
             It is called after the solver has been ran, only in case of success (convergence).
             
         If the AC mod is used, this shall return the current flow on the end of the powerline where there is a protection.
-        For example, if there is a protection on "origin end" of powerline "l2" then this method shall return the current
-        flow of at the "origin end" of powerline l2.
+        For example, if there is a protection on "origin side" of powerline "l2" then this method shall return the current
+        flow of at the "origin side" of powerline l2.
 
-        Note that in general, there is no loss of generality in supposing all protections are set on the "origin end" of
+        Note that in general, there is no loss of generality in supposing all protections are set on the "origin side" of
         the powerline. So this method will return all origin line flows.
         It is also possible, for a specific application, to return the maximum current flow between both ends of a power
         _grid for more complex scenario.
diff --git a/grid2op/Backend/pandaPowerBackend.py b/grid2op/Backend/pandaPowerBackend.py
index 1ba80b16..0cb000c3 100644
--- a/grid2op/Backend/pandaPowerBackend.py
+++ b/grid2op/Backend/pandaPowerBackend.py
@@ -64,31 +64,31 @@ class PandaPowerBackend(Backend):
         The ratio that allow the conversion from pair-unit to kv for the loads
 
     lines_or_pu_to_kv: :class:`numpy.array`, dtype:float
-        The ratio that allow the conversion from pair-unit to kv for the origin end of the powerlines
+        The ratio that allow the conversion from pair-unit to kv for the origin side of the powerlines
 
     lines_ex_pu_to_kv: :class:`numpy.array`, dtype:float
-        The ratio that allow the conversion from pair-unit to kv for the extremity end of the powerlines
+        The ratio that allow the conversion from pair-unit to kv for the extremity side of the powerlines
 
     p_or: :class:`numpy.array`, dtype:float
-        The active power flowing at the origin end of each powerline
+        The active power flowing at the origin side of each powerline
 
     q_or: :class:`numpy.array`, dtype:float
-        The reactive power flowing at the origin end of each powerline
+        The reactive power flowing at the origin side of each powerline
 
     v_or: :class:`numpy.array`, dtype:float
         The voltage magnitude at the origin bus of the powerline
 
     a_or: :class:`numpy.array`, dtype:float
-        The current flowing at the origin end of each powerline
+        The current flowing at the origin side of each powerline
 
     p_ex: :class:`numpy.array`, dtype:float
-        The active power flowing at the extremity end of each powerline
+        The active power flowing at the extremity side of each powerline
 
     q_ex: :class:`numpy.array`, dtype:float
-        The reactive power flowing at the extremity end of each powerline
+        The reactive power flowing at the extremity side of each powerline
 
     a_ex: :class:`numpy.array`, dtype:float
-        The current flowing at the extremity end of each powerline
+        The current flowing at the extremity side of each powerline
 
     v_ex: :class:`numpy.array`, dtype:float
         The voltage magnitude at the extremity bus of the powerline
diff --git a/grid2op/Converter/IdToAct.py b/grid2op/Converter/IdToAct.py
index c1ffd241..be96e992 100644
--- a/grid2op/Converter/IdToAct.py
+++ b/grid2op/Converter/IdToAct.py
@@ -26,7 +26,7 @@ class IdToAct(Converter):
     A "unary action" is an action that consists only in acting on one "concept" it includes:
 
     - disconnecting a single powerline
-    - reconnecting a single powerline and connect it to bus xxx on its origin end and yyy on its extremity end
+    - reconnecting a single powerline and connect it to bus xxx on its origin side and yyy on its extremity side
     - changing the topology of a single substation
     - performing redispatching on a single generator
     - performing curtailment on a single generator
diff --git a/grid2op/Observation/baseObservation.py b/grid2op/Observation/baseObservation.py
index 41beb42b..1a403a9c 100644
--- a/grid2op/Observation/baseObservation.py
+++ b/grid2op/Observation/baseObservation.py
@@ -112,38 +112,38 @@ class BaseObservation(GridObjects):
         voltage angles (see :attr:`BaseObservation.support_theta`).
 
     p_or: :class:`numpy.ndarray`, dtype:float
-        The active power flow at the origin end of each powerline (expressed in MW).
+        The active power flow at the origin side of each powerline (expressed in MW).
 
     q_or: :class:`numpy.ndarray`, dtype:float
-        The reactive power flow at the origin end of each powerline (expressed in MVar).
+        The reactive power flow at the origin side of each powerline (expressed in MVar).
 
     v_or: :class:`numpy.ndarray`, dtype:float
-        The voltage magnitude at the bus to which the origin end of each powerline is connected (expressed in kV).
+        The voltage magnitude at the bus to which the origin side of each powerline is connected (expressed in kV).
 
     theta_or: :class:`numpy.ndarray`, dtype:float
-        The voltage angle at the bus to which the origin end of each powerline
+        The voltage angle at the bus to which the origin side of each powerline
         is connected (expressed in degree). Only availble if the backend supports the retrieval of
         voltage angles (see :attr:`BaseObservation.support_theta`).
 
     a_or: :class:`numpy.ndarray`, dtype:float
-        The current flow at the origin end of each powerline (expressed in A).
+        The current flow at the origin side of each powerline (expressed in A).
 
     p_ex: :class:`numpy.ndarray`, dtype:float
-        The active power flow at the extremity end of each powerline (expressed in MW).
+        The active power flow at the extremity side of each powerline (expressed in MW).
 
     q_ex: :class:`numpy.ndarray`, dtype:float
-        The reactive power flow at the extremity end of each powerline (expressed in MVar).
+        The reactive power flow at the extremity side of each powerline (expressed in MVar).
 
     v_ex: :class:`numpy.ndarray`, dtype:float
-        The voltage magnitude at the bus to which the extremity end of each powerline is connected (expressed in kV).
+        The voltage magnitude at the bus to which the extremity side of each powerline is connected (expressed in kV).
 
     theta_ex: :class:`numpy.ndarray`, dtype:float
-        The voltage angle at the bus to which the extremity end of each powerline
+        The voltage angle at the bus to which the extremity side of each powerline
         is connected (expressed in degree). Only availble if the backend supports the retrieval of
         voltage angles (see :attr:`BaseObservation.support_theta`).
 
     a_ex: :class:`numpy.ndarray`, dtype:float
-        The current flow at the extremity end of each powerline (expressed in A).
+        The current flow at the extremity side of each powerline (expressed in A).
 
     rho: :class:`numpy.ndarray`, dtype:float
         The capacity of each powerline. It is defined at the observed current flow divided by the thermal limit of each
@@ -1863,9 +1863,9 @@ def bus_connectivity_matrix(self,
         optional: 
         
         - `lor_bus` : for each powerline, it gives the id (row / column of the matrix) 
-          of the bus of the matrix to which its origin end is connected         
+          of the bus of the matrix to which its origin side is connected         
         - `lex_bus` : for each powerline, it gives the id (row / column of the matrix) 
-          of the bus of the matrix to which its extremity end is connected         
+          of the bus of the matrix to which its extremity side is connected         
 
         Notes
         ------
@@ -3377,7 +3377,7 @@ def copy(self) -> Self:
     @property
     def line_or_bus(self) -> np.ndarray:
         """
-        Retrieve the busbar at which each origin end of powerline is connected.
+        Retrieve the busbar at which each origin side of powerline is connected.
 
         The result follow grid2op convention:
 
@@ -3399,7 +3399,7 @@ def line_or_bus(self) -> np.ndarray:
     @property
     def line_ex_bus(self) -> np.ndarray:
         """
-        Retrieve the busbar at which each extremity end of powerline is connected.
+        Retrieve the busbar at which each extremity side of powerline is connected.
 
         The result follow grid2op convention:
 
diff --git a/grid2op/Plot/PlotPlotly.py b/grid2op/Plot/PlotPlotly.py
index 14d5419d..aae742f3 100644
--- a/grid2op/Plot/PlotPlotly.py
+++ b/grid2op/Plot/PlotPlotly.py
@@ -143,10 +143,10 @@ def draw_line(pos_sub_or, pos_sub_ex, rho, color_palette, status, line_color="gr
     Parameters
     ----------
     pos_sub_or: ``tuple``
-        Position (x,y) of the origin end of the powerline
+        Position (x,y) of the origin side of the powerline
 
     pos_sub_ex: ``tuple``
-        Position (x,y) of the extremity end of the powerline
+        Position (x,y) of the extremity side of the powerline
 
     rho: ``float``
         Line capacity usage
diff --git a/grid2op/Rules/BaseRules.py b/grid2op/Rules/BaseRules.py
index f6d6b1a4..b822f0f3 100644
--- a/grid2op/Rules/BaseRules.py
+++ b/grid2op/Rules/BaseRules.py
@@ -38,7 +38,7 @@ def __call__(self, action, env):
         As opposed to "ambiguous action", "illegal action" are not illegal per se.
         They are legal or not on a certain environment. For example, disconnecting
         a powerline that has been cut off for maintenance is illegal. Saying to action to both disconnect a
-        powerline and assign it to bus 2 on it's origin end is ambiguous, and not tolerated in Grid2Op.
+        powerline and assign it to bus 2 on it's origin side is ambiguous, and not tolerated in Grid2Op.
 
         Parameters
         ----------
diff --git a/grid2op/Space/GridObjects.py b/grid2op/Space/GridObjects.py
index 7c201961..948533ef 100644
--- a/grid2op/Space/GridObjects.py
+++ b/grid2op/Space/GridObjects.py
@@ -113,7 +113,7 @@ class GridObjects:
               "local topology" of the substation 4 by looking at
               :attr:`grid2op.Observation.BaseObservation.topo_vect` [42:47].
           iii) retrieve which component of this vector of dimension 5 (remember we assumed substation 4 had 5 elements)
-               encodes information about the origin end of the line with id `l_id`. This information is given in
+               encodes information about the origin side of the line with id `l_id`. This information is given in
                :attr:`GridObjects.line_or_to_sub_pos` [l_id]. This is a number between 0 and 4, say it's 3. 3 being
                the index of the object in the substation)
 
@@ -2055,12 +2055,12 @@ def assert_grid_correct_cls(cls):
             zip(cls.line_or_to_subid, cls.line_or_to_sub_pos)
         ):
             if sub_pos >= cls.sub_info[sub_id]:
-                raise IncorrectPositionOfLines("for line {} at origin end".format(i))
+                raise IncorrectPositionOfLines("for line {} at origin side".format(i))
         for i, (sub_id, sub_pos) in enumerate(
             zip(cls.line_ex_to_subid, cls.line_ex_to_sub_pos)
         ):
             if sub_pos >= cls.sub_info[sub_id]:
-                raise IncorrectPositionOfLines("for line {} at extremity end".format(i))
+                raise IncorrectPositionOfLines("for line {} at extremity side".format(i))
         for i, (sub_id, sub_pos) in enumerate(
             zip(cls.storage_to_subid, cls.storage_to_sub_pos)
         ):
@@ -2860,9 +2860,9 @@ def get_obj_connect_to(cls, _sentinel=None, substation_id=None):
                    sub_id, env.name_load[dict_["loads_id"]]))
             print("The names of the generators connected to substation {} are: {}".format(
                    sub_id, env.name_gen[dict_["generators_id"]]))
-            print("The powerline whose origin end is connected to substation {} are: {}".format(
+            print("The powerline whose origin side is connected to substation {} are: {}".format(
                    sub_id, env.name_line[dict_["lines_or_id"]]))
-            print("The powerline whose extremity end is connected to substation {} are: {}".format(
+            print("The powerline whose extremity side is connected to substation {} are: {}".format(
                    sub_id, env.name_line[dict_["lines_ex_id"]]))
             print("The storage units connected to substation {} are: {}".format(
                    sub_id, env.name_line[dict_["storages_id"]]))
@@ -2958,10 +2958,10 @@ def get_obj_substations(cls, _sentinel=None, substation_id=None):
               1. column 0: the id of the substation
               2. column 1: -1 if this object is not a load, or `LOAD_ID` if this object is a load (see example)
               3. column 2: -1 if this object is not a generator, or `GEN_ID` if this object is a generator (see example)
-              4. column 3: -1 if this object is not the origin end of a line, or `LOR_ID` if this object is the
-                 origin end of a powerline(see example)
-              5. column 4: -1 if this object is not a extremity end, or `LEX_ID` if this object is the extremity
-                 end of a powerline
+              4. column 3: -1 if this object is not the origin side of a line, or `LOR_ID` if this object is the
+                 origin side of a powerline(see example)
+              5. column 4: -1 if this object is not a extremity side, or `LEX_ID` if this object is the extremity
+                 side of a powerline
               6. column 5: -1 if this object is not a storage unit, or `STO_ID` if this object is one
 
         Examples
@@ -2984,14 +2984,14 @@ def get_obj_substations(cls, _sentinel=None, substation_id=None):
             # we can also get that:
             # 1. this is not a load (-1 at position 1 - so 2nd component)
             # 2. this is not a generator (-1 at position 2 - so 3rd component)
-            # 3. this is not the origin end of a powerline (-1 at position 3)
-            # 4. this is the extremity end of powerline 0 (there is a 0 at position 4)
+            # 3. this is not the origin side of a powerline (-1 at position 3)
+            # 4. this is the extremity side of powerline 0 (there is a 0 at position 4)
             # 5. this is not a storage unit (-1 at position 5 - so last component)
 
             # likewise, the second element connected at this substation is:
             mat[1,:]
             # array([ 1, -1, -1,  2, -1, -1], dtype=int32)
-            # it represents the origin end of powerline 2
+            # it represents the origin side of powerline 2
 
             # the 5th element connected at this substation is:
             mat[4,:]
@@ -3055,10 +3055,10 @@ def get_lines_id(cls, _sentinel=None, from_=None, to_=None):
             Internal, do not use
 
         from_: ``int``
-            Id the substation to which the origin end of the powerline to look for should be connected to
+            Id the substation to which the origin side of the powerline to look for should be connected to
 
         to_: ``int``
-            Id the substation to which the extremity end of the powerline to look for should be connected to
+            Id the substation to which the extremity side of the powerline to look for should be connected to
 
         Returns
         -------

From 557338ac159cfc0fc258047533a03e668af1de1b Mon Sep 17 00:00:00 2001
From: DONNOT Benjamin <benjamin.donnot@rte-france.com>
Date: Wed, 6 Mar 2024 10:43:52 +0100
Subject: [PATCH 3/6] stopping there for the docs about the MDP for now [skip
 ci]

---
 docs/mdp.rst              | 231 ++++++++++++++++++++++++++++++++++++--
 docs/parameters.rst       |   5 +-
 docs/rules.rst            |   2 +
 docs/voltagecontroler.rst |   3 +
 grid2op/Parameters.py     |   6 +-
 5 files changed, 235 insertions(+), 12 deletions(-)

diff --git a/docs/mdp.rst b/docs/mdp.rst
index fef448e0..64e6ed46 100644
--- a/docs/mdp.rst
+++ b/docs/mdp.rst
@@ -515,7 +515,9 @@ agent in the :ref:`observation_module` page of the documentation.) The
 And, to make it "Markovian" we also need to include :
 
 - the (constant) values of :math:`\mathcal{S}_{\text{im}}^{(\text{in})}` that 
-  are not "part of" :math:`\mathcal{X}`. This might include some physical
+  are not "part of" :math:`\mathcal{X}` (more information about that in 
+  the paragraph ":ref:`mdp-call-simulator-step`" of this documentation). 
+  This might include some physical
   parameters of some elements of the grid (like transformers or powerlines) or
   some other parameters of the solver controlling either the equations to be 
   solved or the solver to use etc. \*
@@ -531,12 +533,189 @@ And, to make it "Markovian" we also need to include :
   you need to be aware of the implementation of the :class:`grid2op.Backend.Backend`
   you are using.
 
+.. note::
+  In this modeling, by design, the agent sees everything that will happen in the
+  future, without uncertainties. To make a parrallel with a "maze" environment, 
+  the agent would see the full maze and its position at each step.
+
+  This is of course not fully representative of the daily powergrid operations, 
+  where the operators cannot see exactly the future. To make this modeling 
+  closer to the reality, you can refer to the paragphs :ref:`pomdp` and :ref:`non-pomdp`
+  below.
+
 .. _mdp-transition-kernel-def:
 
 Transition Kernel
 ~~~~~~~~~~~~~~~~~~~
 
-TODO 
+In this subsection we will describe the so called transition kernel, this is the function that given a 
+state :math:`s` and an action :math:`a` gives a probability distribution over all possible next state 
+:math:`s' \in \mathcal{S}`.
+
+In this subsection, we chose to model this transition kernel as a deterministic
+function (which is equivalent to saying that the probability distribution overs :math:`\mathcal{S}` is 
+a Dirac distribution).
+
+.. note::
+  The removal of the :math:`\mathcal{X}` matrix in the "observation space" see section :ref:`pomdp` or the 
+  rewriting of the MDP to say in the "fully observable setting" (see section :ref:`non-pomdp`) or the
+  introduction of the "opponent" described in section :ref:`mdp-opponent` are all things that "makes" this
+  "transition kernel" probabilistic. We chose the simplicity in presenting it in a fully deterministic
+  fashion.
+
+So let's write what the next state is given the current state :math:`s \in \mathcal{S}` and the action of 
+the agent :math:`a \in \mathcal{A}`. To do that we split the computation in different steps explained bellow.
+
+.. note::
+  To be exhaustive, if the actual state is :math:`s = s_{\emptyset}` then the :math:`s' = s_{\emptyset}` is 
+  returned regardless of the action and the steps described below are skipped.
+
+If the end of the episode is reached then :math:`s' = s_{\emptyset}` is returned.
+
+Step 1: legal vs illegal
++++++++++++++++++++++++++
+
+The first step is to check if the action is :blue:`legal` or not. This depends on the :blue:`rules` (see the 
+dedicated page :ref:`rule-module` of the documentation) and the :blue:`parameters` (more information at the page 
+:ref:`parameters-module` of the documentation). There are basically two cases:
+
+#. the action :math:`a` is legal: then proceed to next step
+#. the action :math:`a` is not, then replace the action by `do nothing`, an action that does not 
+   affect anything and proceed to next step
+
+.. _mdp-read-x-values:
+
+Step 2: load next environment values
++++++++++++++++++++++++++++++++++++++
+
+This is also rather straightforward, the current index is updated (+1 is added) and this 
+new index is used to find the "optimal" (from a market or a central authority perspective)
+value each producer produce to satisfy the demand mof each consumers (in this case large cities or
+companies). These informations are stored in the :math:`\mathcal{X}` matrix.
+
+.. _mdp-redispatching-step:
+
+Step 3: Compute the generators setpoints and handle storage units
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+The next step of the environment is to handle the "continuous" part of the action (*eg* "storage_p", 
+"curtail" or "redisp") and to make sure a suitable setpoint can be reached for each generators (you
+can refer to the pages :ref:`storage-mod-el` and :ref:`generator-mod-el` of this documentation 
+for more information).
+
+There are two alternatives:
+
+#. either the physical constraints cannot be met (there exist no feasible solutions 
+   for at least one generator), and in this case the next state is the 
+   terminal state :math:`s_{\emptyset}` (ignore all the steps bellow)
+#. or they can be met. In this case the "target generator values" is computed as well
+   as the "target storage unit values"
+
+.. note::
+  There is a parameters called :blue:`LIMIT_INFEASIBLE_CURTAILMENT_STORAGE_ACTION` that will
+  try to avoid, as best as possible to fall into infeasibile solution. It does so by limiting 
+  the amount of power that is curtailed or injected in the grid from the storage units: it 
+  modifies the actions :math:`a`.
+
+.. _mdp-call-simulator-step:
+
+Step 4: Call the simulator
++++++++++++++++++++++++++++++++
+
+At this stage then (assuming the physical constraints can be met), the setpoint for the following variables
+is known:
+
+- the status of the lines is deduced from the "change_line_status" and "set_line_status" and their
+  status in :math:`s` (the current state). If there are maintenance (or attacks, see section 
+  :ref:`mdp-opponent`) they can also disconnect powerlines.
+- the busbar to which each elements  is connected is also decuced from the "change_bus" and 
+  "set_bus" part of the action
+- the consumption active and reactive values have been computed from the :math:`\mathcal{X}` 
+  values at previous step
+- the generator active values have just been computed after taking into account the redispatching,
+  curtailement and storage (at this step)
+- the voltage setpoint for each generators is either read from :math:`\mathcal{X}` or 
+  deduced from the above data by the "voltage controler" (more information on :ref:`voltage-controler-module`)
+
+All this should be part of the input solver data :math:`\mathcal{S}_{\text{im}}^{(\text{in})}`. If not, then the
+solver cannot be used unfortunately...
+
+With that (and the other data used by the solver and included in the space, see paragraph 
+:ref:`mdp-state-space-def` of this documentation), the necessary data is shaped (by the Backend) into 
+a valid :math:`s_{\text{im}}^{(\text{in})} \in \mathcal{S}_{\text{im}}^{(\text{in})}`.
+
+The solver is then called and there are 2 alternatives (again):
+
+#. either the solver cannot find a feasible solution (it "diverges"), and in this case the next state is the 
+   terminal state :math:`s_{\emptyset}` (ignore all the steps bellow)
+#. or a physical solution is found and the process carries out in the next steps
+
+.. _mdp-protection-emulation-step:
+
+Step 5: Emulation of the "protections"
+++++++++++++++++++++++++++++++++++++++++++
+
+At this stage an object :math:`s_{\text{im}}^{(\text{out})} \in \mathcal{S}_{\text{im}}^{(\text{out})}` 
+has been computed by the solver. 
+
+The first step performed by grid2op is to look at the flows (in Amps) on the powerlines (these data
+are part of :math:`s_{\text{im}}^{(\text{out})}`) and to check whether they meet some constraints 
+defined in the :blue:`parameters` (mainly if for some powerline the flow is too high, or if it has been 
+too high for too long, see :blue:`HARD_OVERFLOW_THRESHOLD`, :blue:`NB_TIMESTEP_OVERFLOW_ALLOWED` and 
+:blue:`NO_OVERFLOW_DISCONNECTION`). If some powerlines are disconnected at this step, then the
+"setpoint" send to the backend at the previous step is modified and it goes back 
+to :ref:`mdp-call-simulator-step`.
+
+.. note::
+  The simulator can already handle a real simulation of these "protections". This "outer loop"
+  is because some simulators does not do it.
+
+.. note::
+  For the purist, this "outer loop" necessarily terminates. It is trigger when at least one 
+  powerline needs to be disconnected. And there are :green:`n_line` (finite) powerlines.
+
+Step 6: Reading back the "grid dependant" attributes
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+At this stage an object :math:`s_{\text{im}}^{(\text{out})} \in \mathcal{S}_{\text{im}}^{(\text{out})}` 
+has been computed by the solver and all the "rules" / "parameters" regarding powerlines
+are met.
+
+As discussed in the section about "state space" (see :ref:`mdp-state-space-def` for more information),
+the next state space :math:`s'` include some part of the outcome of the solver. These data
+are then read from the  :math:`s_{\text{im}}^{(\text{out})}`, which
+includes but is not limited to the loads active values `load_p`_, 
+loads reactive values `load_q`_, voltage magnitude 
+at each loads `load_v`_, the same kind of attributes but for generators
+`gen_p`_, `gen_q`_, `gen_v`_, `gen_theta`_  and also for powerlines 
+`p_or`_, `q_or`_, `v_or`_, `a_or`_, `theta_or`_, `p_ex`_, `q_ex`_, `v_ex`_, 
+`a_ex`_, `theta_ex`_, `rho`_ etc.
+  
+
+Step 7: update the other attributes of the state space
++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+Finally, the environment takes care of updating all the other "part"
+of the state space, which are:
+
+- attributes related to "redispatching" are updated at in paragraph :ref:`mdp-redispatching-step`
+- and so are attributes related to storage units
+- the information about the date and time are loaded from the :math:`\mathcal{X}` matrix.
+
+As for the attributes related to the rules of the game, they are updated in the following way:
+
+- `timestep_overflow`_ is set to 0 for all powerlines not in overflow and increased by 1 for all the other
+- `time_before_cooldown_line`_ is reduced by 1 for all line that has not been impacted by the action :math:`a` 
+  otherwise set to :blue:`param.NB_TIMESTEP_COOLDOWN_LINE`
+- `time_before_cooldown_sub`_ is reduced by 1 for all substations that has not been impacted by the action :math:`a` 
+  otherwise set to :blue:`param.NB_TIMESTEP_COOLDOWN_SUB`
+
+The new state :math:`s'` is then passed to the agent.
+
+.. note::
+  We remind that this process might have terminated before reaching the last step described above, for example
+  at :ref:`mdp-redispatching-step` or at :ref:`mdp-call-simulator-step` or during the 
+  emulation of the protections described at :ref:`mdp-protection-emulation-step`
 
 Reward Kernel
 ~~~~~~~~~~~~~~~~~~~
@@ -558,9 +737,12 @@ of this documentation.
 Extensions
 -----------
 
-TODO: this part of the section is still an ongoing work.
+In this last section of this page of the documentation, we dive more onto some aspect of the grid2op MDP.
 
-Let us know if you want to contribute !
+.. note:: 
+  TODO: This part of the section is still an ongoing work.
+
+  Let us know if you want to contribute !
 
 
 .. _pomdp:
@@ -568,22 +750,55 @@ Let us know if you want to contribute !
 Partial Observatibility
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-This is the case in most grid2op environment: only some part of the environment
+This is the case in most grid2op environments: only some part of the environment
 state at time `t` :math:`s_t` are
-given to the agent in the observation at time `t` :math:`o_t`.
+given to the agent in the observation at time `t` :math:`o_t`. 
+
+Mathematically this can be modeled with the introduction of an "observation space" and an 
+"observation kernel". This kernel will only expose part of the "state space" to the agent and
+(in grid2op) is a deterministic function that depends on the environment state :math:`s'`.
 
 More specifically, in most grid2op environment (by default at least), none of the 
 physical parameters of the solvers are provided. Also, to represent better
 the daily operation in power systems, only the `t` th row of the matrix :math:`\mathcal{X}_t` 
 is given in the observation :math:`o_t`. The components :math:`\mathcal{X}_{t', i}` 
-(for :math:`\forall t' > t`) are not given.
+(for :math:`\forall t' > t`) are not given. The observation kernel in grid2op will 
+mask out some part of the "environment state" to the agent.
 
 .. _non-pomdp:
 
 Or not partial observatibility ?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-TODO remodel the grid2op MDP without the X
+If we consider that the agent is aware of the simulator used and all it's "constant" (see
+paragraph :ref:`mdp-state-space-def`) part of :math:`\mathcal{S}_{\text{im}}^{(\text{in})}`
+(which are part of the simulator that are not affected by the actions of 
+the agent nor by environment) then we can model the grid2op MDP without the need 
+to use an observation kernel: it can be a regular MDP.
+
+To "remove" the need of partial observatibility, without the need to suppose that the 
+agent sees all the future we can adapt slightly the modeling which allows us to
+remove completely the :math:`\mathcal{X}` matrix :
+
+- the observation space / state space (which are equal in this setting) are the same as the 
+  one used in :ref:`pomdp`
+- the transition kernel is now stochastic. Indeed, the "next" value of the loads and generators 
+  are, in this modeling not read from a :math:`\mathcal{X}` matrix but sampled from a given 
+  distribution which replaces the step :ref:`mdp-read-x-values` of subsection 
+  :ref:`mdp-transition-kernel-def`. And once the values of these variables are sampled, 
+  the rest of the steps described there are unchanged.
+
+.. note::
+  The above holds as long as there exist a way to sample new values for gen_p, load_p, gen_v and
+  load_q that is markovian. We suppose it exists here and will not write it down.
+
+.. note::
+  Sampling from these distribution can be quite challenging and will not be covered here. 
+
+  One of the challenging part is that the sampled generations need to meet the demand (and
+  the losses) as well as all the constraints on the generators (p_min, p_max and ramps)
+
+.. _mdp-opponent:
 
 Adversarial attacks
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/parameters.rst b/docs/parameters.rst
index f89ccc78..727a422e 100644
--- a/docs/parameters.rst
+++ b/docs/parameters.rst
@@ -1,6 +1,8 @@
+.. _parameters-module:
+
 Parameters
 ===================================
-The challenge "learning to run a power network" offers different _parameters to be customized, or to learn an
+The challenge "learning to run a power network" offers different parameters to be customized, or to learn an
 :class:`grid2op.Agent` that will perform better for example.
 
 This class is an attempt to group them all inside one single structure.
@@ -10,6 +12,7 @@ come soon.
 
 Example
 --------
+
 If you want to change the parameters it is better to do it at the creation of the environment.
 
 This can be done with:
diff --git a/docs/rules.rst b/docs/rules.rst
index 24e7c087..40ef5ac4 100644
--- a/docs/rules.rst
+++ b/docs/rules.rst
@@ -1,5 +1,7 @@
 .. currentmodule:: grid2op.Rules
 
+.. _rule-module:
+
 Rules of the Game
 ===================================
 
diff --git a/docs/voltagecontroler.rst b/docs/voltagecontroler.rst
index eb7b902f..19e39129 100644
--- a/docs/voltagecontroler.rst
+++ b/docs/voltagecontroler.rst
@@ -1,5 +1,8 @@
 .. currentmodule:: grid2op.VoltageControler
 
+.. _voltage-controler-module:
+
+
 Voltage Controler
 ===================================
 
diff --git a/grid2op/Parameters.py b/grid2op/Parameters.py
index 56e523b1..c16d9a93 100644
--- a/grid2op/Parameters.py
+++ b/grid2op/Parameters.py
@@ -22,9 +22,9 @@ class Parameters:
     Attributes
     ----------
     NO_OVERFLOW_DISCONNECTION: ``bool``
-        If set to ``True`` then the :class:`grid2op.Environment.Environment` will not disconnect powerline above their
-        thermal
-        limit. Default is ``False``
+        If set to ``True`` then the :class:`grid2op.Environment.Environment` will **NOT** disconnect powerline above their
+        thermal limit. Default is ``False``, meaning that grid2op will disconnect powerlines above their limits
+        for too long or for "too much".
 
     NB_TIMESTEP_OVERFLOW_ALLOWED: ``int``
         Number of timesteps for which a soft overflow is allowed, default 2. This means that a powerline will be

From 7155cffa42349c9c1f980f3cbe2c290a2015c9b9 Mon Sep 17 00:00:00 2001
From: DONNOT Benjamin <benjamin.donnot@rte-france.com>
Date: Wed, 6 Mar 2024 11:24:29 +0100
Subject: [PATCH 4/6] ready to upgrade to version 1.10.0

---
 CHANGELOG.rst                                 |   3 +-
 docs/createbackend.rst                        |  33 +++--
 grid2op/Action/baseAction.py                  |  40 +++---
 grid2op/Backend/backend.py                    |  25 ++--
 grid2op/MakeEnv/Make.py                       |   2 +-
 grid2op/Observation/baseObservation.py        |  13 +-
 grid2op/Space/GridObjects.py                  |   4 +-
 grid2op/__init__.py                           |   2 +-
 .../gym_compat/multidiscrete_gym_actspace.py  |   4 +-
 grid2op/tests/aaa_test_backend_interface.py   | 120 +++++++++++++++++-
 grid2op/tests/helper_path_test.py             |   3 +-
 grid2op/tests/test_Agent.py                   |   2 +-
 grid2op/tests/test_GridObjects.py             |   2 +-
 13 files changed, 185 insertions(+), 68 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 3ce1abd0..dcd6cd59 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -32,7 +32,7 @@ Change Log
 - [???] properly model interconnecting powerlines
 
 
-[1.9.9] - 2024-xx-yy
+[1.10.0] - 2024-03-06
 ----------------------
 - [BREAKING] the order of the actions in `env.action_space.get_all_unitary_line_set` and 
   `env.action_space.get_all_unitary_topologies_set` might have changed (this is caused 
@@ -75,6 +75,7 @@ Change Log
 - [IMPROVED] type hints for the `gym_compat` module (more work still required in this area)
 - [IMPROVED] the `MultiDiscreteActSpace` to have one "dimension" controling all powerlines
   (see "one_line_set" and "one_line_change")
+- [IMPROVED] doc at different places, including the addition of the MDP implemented by grid2op.
 
 [1.9.8] - 2024-01-26
 ----------------------
diff --git a/docs/createbackend.rst b/docs/createbackend.rst
index 9889c05c..db767c27 100644
--- a/docs/createbackend.rst
+++ b/docs/createbackend.rst
@@ -198,8 +198,9 @@ There are 4 **__main__** types of method you need to implement if you want to us
 
 .. _grid-description:
 
-Grid description
-------------------
+load_grid: Grid description
+----------------------------
+
 In this section we explicit what attributes need to be implemented to have a valid backend instance. We focus on
 the attribute of the `Backend` you have to set. But don't forget you also need to load a powergrid and store
 it in the `_grid` attribute.
@@ -210,18 +211,16 @@ Basically the `load_grid` function would look something like:
 
     def load_grid(self, path=None, filename=None):
         # simply handles different way of inputing the data
-        if path is None and filename is None:
-            raise RuntimeError("You must provide at least one of path or file to load a powergrid.")
-        if path is None:
-            full_path = filename
-        elif filename is None:
-            full_path = path
-        else:
-            full_path = os.path.join(path, filename)
-        if not os.path.exists(full_path):
-            raise RuntimeError("There is no powergrid at \"{}\"".format(full_path))
-
-        # load the grid in your favorite format:
+        full_path = self.make_complete_path(path, filename)
+
+        # from grid2op 1.10.0 you need to call one of
+        self.can_handle_more_than_2_busbar()  # see doc for more information
+        OR
+        self.cannot_handle_more_than_2_busbar()  # see doc for more information
+        # It is important you include it at the top of this method, otherwise you
+        # will not have access to self.n_busbar_per_sub
+
+        # load the grid in your favorite format, located at `full_path`:
         self._grid = ... # the way you do that depends on the "solver" you use
 
         # and now initialize the attributes (see list bellow)
@@ -516,7 +515,7 @@ of your implementation of `load_grid` function)
 
 .. _backend-action-create-backend:
 
-BackendAction: modification
+apply_action: underlying grid modification
 ----------------------------------------------
 In this section we detail step by step how to understand the specific format used by grid2op to "inform" the backend
 on how to modify its internal state before computing a powerflow.
@@ -698,8 +697,8 @@ And of course you do the same for generators and both ends of each powerline.
 
 .. _vector-orders-create-backend:
 
-Read back the results (flows, voltages etc.)
------------------------------------------------
+***_infos() : Read back the results (flows, voltages etc.)
+--------------------------------------------------------------
 This last "technical" part concerns what can be refer to as "getters" from the backend. These functions allow to
 read back the state of the grid and expose its results to grid2op in a standardize manner.
 
diff --git a/grid2op/Action/baseAction.py b/grid2op/Action/baseAction.py
index c9b705a9..6a66c083 100644
--- a/grid2op/Action/baseAction.py
+++ b/grid2op/Action/baseAction.py
@@ -82,8 +82,8 @@ class BaseAction(GridObjects):
         - -1 -> disconnect the object.
         - 1 -> connect to bus 1
         - 2 -> connect to bus 2
-        - 3 -> connect to bus 3 (added in version 1.9.9)
-        - etc.  (added in version 1.9.9)
+        - 3 -> connect to bus 3 (added in version 1.10.0)
+        - etc.  (added in version 1.10.0)
 
     - the fifth element changes the buses to which the object is connected. It's a boolean vector interpreted as:
 
@@ -828,7 +828,7 @@ def process_grid2op_compat(cls):
             cls.dim_alerts = 0
 
         if (cls.n_busbar_per_sub >= 3) or (cls.n_busbar_per_sub == 1):
-            # only relevant for grid2op >= 1.9.9
+            # only relevant for grid2op >= 1.10.0
             # remove "change_bus" if it's there more than 3 buses (no sense: where to change it ???)
             # or if there are only one busbar (cannot change anything)
             # if there are only one busbar, the "set_bus" action can still be used
@@ -2179,7 +2179,7 @@ def update(self, dict_):
                 - 0 -> don't change anything
                 - +1 -> set to bus 1,
                 - +2 -> set to bus 2
-                - +3 -> set to bus 3 (grid2op >= 1.9.9)
+                - +3 -> set to bus 3 (grid2op >= 1.10.0)
                 - etc.
                 - -1: You can use this method to disconnect an object by setting the value to -1.
 
@@ -2187,7 +2187,7 @@ def update(self, dict_):
               True will
               change it (eg switch it from bus 1 to bus 2 or from bus 2 to bus 1). NB this is only active if the system
               has only 2 buses per substation.
-              .. versionchanged:: 1.9.9
+              .. versionchanged:: 1.10.0
                 This feature is deactivated if `act.n_busbar_per_sub >= 3` or `act.n_busbar_per_sub == 1`
 
             - "redispatch": the best use of this is to specify either the numpy array of the redispatch vector you want
@@ -4060,8 +4060,8 @@ def load_set_bus(self) -> np.ndarray:
         """
         Allows to retrieve (and affect) the busbars at which the action **set** the loads.
 
-        .. versionchanged:: 1.9.9
-            From grid2op version 1.9.9 it is possible (under some cirumstances, depending on how
+        .. versionchanged:: 1.10.0
+            From grid2op version 1.10.0 it is possible (under some cirumstances, depending on how
             the environment is created) to set the busbar to a number >= 3, depending on the value
             of `type(act).n_busbar_per_sub`.
             
@@ -4075,8 +4075,8 @@ def load_set_bus(self) -> np.ndarray:
             * -1 the action disconnect the load
             * 1 the action set the load to busbar 1
             * 2 the action set the load to busbar 2
-            * 3 the action set the load to busbar 3 (grid2op >= 1.9.9)
-            * etc. (grid2op >= 1.9.9)
+            * 3 the action set the load to busbar 3 (grid2op >= 1.10.0)
+            * etc. (grid2op >= 1.10.0)
 
         Examples
         --------
@@ -4131,8 +4131,8 @@ def gen_set_bus(self) -> np.ndarray:
         """
         Allows to retrieve (and affect) the busbars at which the action **set** the generator units.
 
-        .. versionchanged:: 1.9.9
-            From grid2op version 1.9.9 it is possible (under some cirumstances, depending on how
+        .. versionchanged:: 1.10.0
+            From grid2op version 1.10.0 it is possible (under some cirumstances, depending on how
             the environment is created) to set the busbar to a number >= 3, depending on the value
             of `type(act).n_busbar_per_sub`.
             
@@ -4146,8 +4146,8 @@ def gen_set_bus(self) -> np.ndarray:
             * -1 the action disconnect the generator
             * 1 the action set the generator to busbar 1
             * 2 the action set the generator to busbar 2
-            * 3 the action set the generator to busbar 3 (grid2op >= 1.9.9)
-            * etc. (grid2op >= 1.9.9)
+            * 3 the action set the generator to busbar 3 (grid2op >= 1.10.0)
+            * etc. (grid2op >= 1.10.0)
 
         Examples
         --------
@@ -4271,8 +4271,8 @@ def storage_set_bus(self) -> np.ndarray:
         """
         Allows to retrieve (and affect) the busbars at which the action **set** the storage units.
 
-        .. versionchanged:: 1.9.9
-            From grid2op version 1.9.9 it is possible (under some cirumstances, depending on how
+        .. versionchanged:: 1.10.0
+            From grid2op version 1.10.0 it is possible (under some cirumstances, depending on how
             the environment is created) to set the busbar to a number >= 3, depending on the value
             of `type(act).n_busbar_per_sub`.
             
@@ -4286,8 +4286,8 @@ def storage_set_bus(self) -> np.ndarray:
             * -1 the action disconnect the storage unit
             * 1 the action set the storage unit to busbar 1
             * 2 the action set the storage unit to busbar 2
-            * 3 the action set the storage unit to busbar 3 (grid2op >= 1.9.9)
-            * etc. (grid2op >= 1.9.9)
+            * 3 the action set the storage unit to busbar 3 (grid2op >= 1.10.0)
+            * etc. (grid2op >= 1.10.0)
 
         Examples
         --------
@@ -4345,8 +4345,8 @@ def line_or_set_bus(self) -> np.ndarray:
         """
         Allows to retrieve (and affect) the busbars at which the action **set** the lines (origin side).
 
-        .. versionchanged:: 1.9.9
-            From grid2op version 1.9.9 it is possible (under some cirumstances, depending on how
+        .. versionchanged:: 1.10.0
+            From grid2op version 1.10.0 it is possible (under some cirumstances, depending on how
             the environment is created) to set the busbar to a number >= 3, depending on the value
             of `type(act).n_busbar_per_sub`.
             
@@ -4360,7 +4360,7 @@ def line_or_set_bus(self) -> np.ndarray:
             * -1 the action disconnect the line (origin side)
             * 1 the action set the line (origin side) to busbar 1
             * 2 the action set the line (origin side) to busbar 2
-            * 3 the action set the line (origin side) to busbar 3 (grid2op >= 1.9.9)
+            * 3 the action set the line (origin side) to busbar 3 (grid2op >= 1.10.0)
             * etc.
 
         Examples
diff --git a/grid2op/Backend/backend.py b/grid2op/Backend/backend.py
index db8a6cf0..820f41e8 100644
--- a/grid2op/Backend/backend.py
+++ b/grid2op/Backend/backend.py
@@ -171,16 +171,16 @@ def __init__(self,
         for k, v in kwargs.items():
             self._my_kwargs[k] = v
         
-        #: .. versionadded:: 1.9.9
+        #: .. versionadded:: 1.10.0
         #:
         #: A flag to indicate whether the :func:`Backend.cannot_handle_more_than_2_busbar`
         #: or the :func:`Backend.cannot_handle_more_than_2_busbar`
         #: has been called when :func:`Backend.load_grid` was called.
-        #: Starting from grid2op 1.9.9 this is a requirement (to 
+        #: Starting from grid2op 1.10.0 this is a requirement (to 
         #: ensure backward compatibility)
         self._missing_two_busbars_support_info: bool = True
         
-        #: .. versionadded:: 1.9.9
+        #: .. versionadded:: 1.10.0
         #: 
         #: There is a difference between this and the class attribute.
         #: You should not worry about the class attribute of the backend in :func:`Backend.apply_action`
@@ -188,9 +188,9 @@ def __init__(self,
     
     def can_handle_more_than_2_busbar(self):
         """
-        .. versionadded:: 1.9.9
+        .. versionadded:: 1.10.0
         
-        This function should be called once in `load_grid` if your backend is able
+        This function should be called once in :func:`Backend.load_grid` if your backend is able
         to handle more than 2 busbars per substation.
         
         If not called, then the `environment` will not be able to use more than 2 busbars per substations.
@@ -199,7 +199,7 @@ def can_handle_more_than_2_busbar(self):
             :func:`Backend.cannot_handle_more_than_2_busbar`
 
         .. note::
-            From grid2op 1.9.9 it is preferable that your backend calls one of
+            From grid2op 1.10.0 it is preferable that your backend calls one of
             :func:`Backend.can_handle_more_than_2_busbar` or 
             :func:`Backend.cannot_handle_more_than_2_busbar`.
             
@@ -216,9 +216,9 @@ def can_handle_more_than_2_busbar(self):
     
     def cannot_handle_more_than_2_busbar(self):
         """
-        .. versionadded:: 1.9.9
+        .. versionadded:: 1.10.0
         
-        This function should be called once in `load_grid` if your backend is **NOT** able
+        This function should be called once in :func:`Backend.load_grid` if your backend is **NOT** able
         to handle more than 2 busbars per substation.
         
         If not called, then the `environment` will not be able to use more than 2 busbars per substations.
@@ -227,7 +227,7 @@ def cannot_handle_more_than_2_busbar(self):
             :func:`Backend.cnot_handle_more_than_2_busbar`
 
         .. note::
-            From grid2op 1.9.9 it is preferable that your backend calls one of
+            From grid2op 1.10.0 it is preferable that your backend calls one of
             :func:`Backend.can_handle_more_than_2_busbar` or 
             :func:`Backend.cannot_handle_more_than_2_busbar`.
             
@@ -241,7 +241,10 @@ def cannot_handle_more_than_2_busbar(self):
         """
         self._missing_two_busbars_support_info = False
         if type(self).n_busbar_per_sub != DEFAULT_N_BUSBAR_PER_SUB:
-            warnings.warn("You asked in `make` function to pass ")
+            warnings.warn("You asked in `make` function to have more than 2 busbar per substation. It is "
+                          f"not possible with a backend of type {type(self)}. To "
+                          "'fix' this issue, you need to change the implementation of your backend or "
+                          "upgrade it to a newer version.")
         self.n_busbar_per_sub = DEFAULT_N_BUSBAR_PER_SUB
     
     def make_complete_path(self,
@@ -1943,7 +1946,7 @@ def assert_grid_correct(self) -> None:
 
         if self._missing_two_busbars_support_info:
             warnings.warn("The backend implementation you are using is probably too old to take advantage of the "
-                          "new feature added in grid2op 1.9.9: the possibility "
+                          "new feature added in grid2op 1.10.0: the possibility "
                           "to have more than 2 busbars per substations (or not). "
                           "To silence this warning, you can modify the `load_grid` implementation "
                           "of your backend and either call:\n"
diff --git a/grid2op/MakeEnv/Make.py b/grid2op/MakeEnv/Make.py
index 3cde1b55..4692c674 100644
--- a/grid2op/MakeEnv/Make.py
+++ b/grid2op/MakeEnv/Make.py
@@ -290,7 +290,7 @@ def make(
     .. versionchanged:: 1.9.3
         Remove the possibility to use this function with arguments (force kwargs)
     
-    .. versionadded:: 1.9.9
+    .. versionadded:: 1.10.0
         The `n_busbar` parameters
         
     Parameters
diff --git a/grid2op/Observation/baseObservation.py b/grid2op/Observation/baseObservation.py
index 1a403a9c..be05db50 100644
--- a/grid2op/Observation/baseObservation.py
+++ b/grid2op/Observation/baseObservation.py
@@ -2257,7 +2257,7 @@ def get_energy_graph(self) -> networkx.Graph:
         Convert this observation as a networkx graph. This graph is the graph "seen" by
         "the electron" / "the energy" of the power grid.
 
-        .. versionchanged:: 1.9.9
+        .. versionchanged:: 1.10.0
             Addition of the attribute `local_bus_id` and `global_bus_id` for the nodes of the returned graph.
             
             `local_bus_id` give the local bus id (from 1 to `obs.n_busbar_per_sub`) id of the 
@@ -2266,7 +2266,6 @@ def get_energy_graph(self) -> networkx.Graph:
             `global_bus_id` give the global bus id (from 0 to `obs.n_busbar_per_sub * obs.n_sub - 1`) id of the 
             bus represented by this node.
             
-        .. versionchanged:: 1.9.9
             Addition of the attribute `global_bus_or` and `global_bus_ex` for the edges of the returned graph.
             
             These provides the global id of the `origin` / `ext` side to which powerline(s) represented by
@@ -2291,10 +2290,10 @@ def get_energy_graph(self) -> networkx.Graph:
             - `cooldown`: how much longer you need to wait before being able to merge / split or change this node
             - 'sub_id': the id of the substation to which it is connected (typically between `0` and `obs.n_sub - 1`)
             - 'local_bus_id': the local bus id (from 1 to `obs.n_busbar_per_sub`) of the bus represented by this node
-              (new in version 1.9.9)
+              (new in version 1.10.0)
             - 'global_bus_id': the global bus id (from 0 to `obs.n_busbar_per_sub * obs.n_sub - 1`) 
               of the bus represented by this node
-              (new in version 1.9.9)
+              (new in version 1.10.0)
             - `cooldown` : the time you need to wait (in number of steps) before being able to act on the
               substation to which this bus is connected.
             - (optional) `theta`: the voltage angle (in degree) at this nodes
@@ -2329,11 +2328,11 @@ def get_energy_graph(self) -> networkx.Graph:
             - 'global_bus_or': the global bus id (from 0 to `obs.n_busbar_per_sub * obs.n_sub - 1`) 
               of the bus to which the origin side of the line(s) represented by this edge
               is (are) connected
-              (new in version 1.9.9)
+              (new in version 1.10.0)
             - 'global_bus_ex': the global bus id (from 0 to `obs.n_busbar_per_sub * obs.n_sub - 1`) 
               of the bus to which the ext side of the line(s) represented by this edge
               is (are) connected
-              (new in version 1.9.9)
+              (new in version 1.10.0)
             - (optional) `theta_or`: voltage angle at the "or" bus (in deg)
             - (optional) `theta_ex`: voltage angle at the "ex" bus (in deg)
 
@@ -4749,7 +4748,7 @@ def get_back_to_ref_state(
         to busbar 1, no redispatching, no curtailment)
         
         
-        .. versionadded:: 1.9.9
+        .. versionadded:: 1.10.0
         
         This function uses the method of the underlying action_space used 
         for the forecasts.
diff --git a/grid2op/Space/GridObjects.py b/grid2op/Space/GridObjects.py
index 948533ef..8388baf7 100644
--- a/grid2op/Space/GridObjects.py
+++ b/grid2op/Space/GridObjects.py
@@ -195,7 +195,7 @@ class GridObjects:
         number of independant busbars for all substations [*class attribute*]. It's 2 by default
         or if the implementation of the backend does not support this feature.
         
-        .. versionadded:: 1.9.9
+        .. versionadded:: 1.10.0
 
     n_line: :class:`int`
         number of powerlines in the powergrid [*class attribute*]
@@ -2810,7 +2810,7 @@ def process_grid2op_compat(cls):
             cls.alertable_line_names = []
             cls.alertable_line_ids = []
             
-        if glop_ver < version.parse("1.9.9.dev0"):
+        if glop_ver < version.parse("1.10.0"):
             # this feature did not exists before
             # I need to set it to the default if set elsewhere
             cls.n_busbar_per_sub = DEFAULT_N_BUSBAR_PER_SUB
diff --git a/grid2op/__init__.py b/grid2op/__init__.py
index c3c4bb23..f8ec72c3 100644
--- a/grid2op/__init__.py
+++ b/grid2op/__init__.py
@@ -11,7 +11,7 @@
 Grid2Op
 
 """
-__version__ = '1.9.9.dev0'
+__version__ = '1.10.0.dev1'
 
 __all__ = [
     "Action",
diff --git a/grid2op/gym_compat/multidiscrete_gym_actspace.py b/grid2op/gym_compat/multidiscrete_gym_actspace.py
index e463f8f3..60999fd9 100644
--- a/grid2op/gym_compat/multidiscrete_gym_actspace.py
+++ b/grid2op/gym_compat/multidiscrete_gym_actspace.py
@@ -70,10 +70,10 @@ class __AuxMultiDiscreteActSpace:
     - "one_sub_change": 1 single dimension. Same as above.
     - "one_line_set": 1 single dimension. In this type of representation, you have one dimension with `1 + 2 * n_line`
       elements: first is "do nothing", then next elements control the force connection or disconnection
-      of the powerlines (new in version 1.9.9)
+      of the powerlines (new in version 1.10.0)
     - "one_line_change": 1 single dimension. In this type of representation, you have `1 + n_line` possibility
       for this element. First one is "do nothing" then it controls the change of status of 
-      any given line (new in version 1.9.9).
+      any given line (new in version 1.10.0).
 
     .. warning::
 
diff --git a/grid2op/tests/aaa_test_backend_interface.py b/grid2op/tests/aaa_test_backend_interface.py
index 5bb4bc3c..8045bc15 100644
--- a/grid2op/tests/aaa_test_backend_interface.py
+++ b/grid2op/tests/aaa_test_backend_interface.py
@@ -38,9 +38,9 @@ def aux_get_env_name(self):
         """do not run nor modify ! (used for this test class only)"""
         return "BasicTest_load_grid_" + type(self).__name__
 
-    def aux_make_backend(self) -> Backend:
+    def aux_make_backend(self, n_busbar=2) -> Backend:
         """do not run nor modify ! (used for this test class only)"""
-        backend = self.make_backend_with_glue_code()
+        backend = self.make_backend_with_glue_code(n_busbar=n_busbar)
         backend.load_grid(self.get_path(), self.get_casefile())
         backend.load_redispacthing_data("tmp")  # pretend there is no generator
         backend.load_storage_data(self.get_path())
@@ -1555,4 +1555,118 @@ def test_28_topo_vect_set(self):
                                        el_nm, el_key, el_pos_topo_vect)
         else:
              warnings.warn(f"{type(self).__name__} test_28_topo_vect_set: This test is not performed in depth as your backend does not support storage units (or there are none on the grid)")
-            
\ No newline at end of file
+
+    def test_29_xxx_handle_more_than_2_busbar_called(self):    
+        """Tests that at least one of the function:
+        
+        - :func:`grid2op.Backend.Backend.can_handle_more_than_2_busbar`
+        - :func:`grid2op.Backend.Backend.cannot_handle_more_than_2_busbar`
+        
+        has been implemented in the :func:`grid2op.Backend.Backend.load_grid`
+        implementation.
+        
+        This test supposes that :
+        
+        - backend.load_grid(...) is implemented
+        
+        .. versionadded:: 1.10.0
+        
+        """
+        self.skip_if_needed()
+        backend = self.aux_make_backend()
+        assert not backend._missing_two_busbars_support_info
+    
+    def test_30_n_busbar_per_sub_ok(self):    
+        """Tests that your backend can properly handle more than
+        3 busbars (only applies if your backend supports the feature): basically that 
+        objects can be moved to busbar 3 without trouble.
+        
+        This test supposes that :
+        
+        - backend.load_grid(...) is implemented
+        - backend.runpf() (AC mode) is implemented
+        - backend.apply_action() for all types of action
+        - backend.reset() is implemented
+        - backend.get_topo_vect() is implemented       
+        
+        .. versionadded:: 1.10.0
+        
+        """    
+        self.skip_if_needed()
+        backend = self.aux_make_backend(n_busbar=3)
+        cls = type(backend)
+        if cls.n_busbar_per_sub != 3:
+            self.skipTest("Your backend does not support more than 2 busbars.")
+        
+        res = backend.runpf(is_dc=False)
+        assert res[0],  f"Your backend diverged in AC after loading the grid state, error was {res[1]}"    
+        topo_vect_orig = self._aux_check_topo_vect(backend)
+        
+        # line or
+        line_id = 0
+        busbar_id = 3
+        backend.reset(self.get_path(), self.get_casefile())
+        action = type(backend)._complete_action_class()
+        action.update({"set_bus": {"lines_or_id": [(line_id, busbar_id)]}})
+        bk_act = type(backend).my_bk_act_class()
+        bk_act += action
+        backend.apply_action(bk_act)
+        res = backend.runpf(is_dc=False)  
+        assert res[0],  f"Your backend diverged in AC after setting a line (or side) on busbar 3, error was {res[1]}"    
+        topo_vect = self._aux_check_topo_vect(backend)
+        error_msg = (f"Line {line_id} (or. side) has been moved to busbar {busbar_id}, yet according to 'topo_vect' "
+                     f"is still connected (origin side) to busbar {topo_vect[cls.line_or_pos_topo_vect[line_id]]}")
+        assert topo_vect[cls.line_or_pos_topo_vect[line_id]] == busbar_id, error_msg
+        
+        # line ex
+        line_id = 0
+        busbar_id = 3
+        backend.reset(self.get_path(), self.get_casefile())
+        action = type(backend)._complete_action_class()
+        action.update({"set_bus": {"lines_ex_id": [(line_id, busbar_id)]}})
+        bk_act = type(backend).my_bk_act_class()
+        bk_act += action
+        backend.apply_action(bk_act)
+        res = backend.runpf(is_dc=False)  
+        assert res[0],  f"Your backend diverged in AC after setting a line (ex side) on busbar 3, error was {res[1]}"    
+        topo_vect = self._aux_check_topo_vect(backend)
+        error_msg = (f"Line {line_id} (ex. side) has been moved to busbar {busbar_id}, yet according to 'topo_vect' "
+                     f"is still connected (ext side) to busbar {topo_vect[cls.line_ex_pos_topo_vect[line_id]]}")
+        assert topo_vect[cls.line_ex_pos_topo_vect[line_id]] == busbar_id, error_msg
+        
+        # load
+        backend.reset(self.get_path(), self.get_casefile())
+        busbar_id = 3
+        nb_el = cls.n_load
+        el_to_subid = cls.load_to_subid
+        el_nm = "load"
+        el_key = "loads_id"
+        el_pos_topo_vect = cls.load_pos_topo_vect
+        self._aux_check_el_generic(backend, busbar_id, nb_el, el_to_subid, 
+                                   el_nm, el_key, el_pos_topo_vect)
+        
+        # generator
+        backend.reset(self.get_path(), self.get_casefile())
+        busbar_id = 3
+        nb_el = cls.n_gen
+        el_to_subid = cls.gen_to_subid
+        el_nm = "generator"
+        el_key = "generators_id"
+        el_pos_topo_vect = cls.gen_pos_topo_vect
+        self._aux_check_el_generic(backend, busbar_id, nb_el, el_to_subid, 
+                                   el_nm, el_key, el_pos_topo_vect)
+        
+        # storage
+        if cls.n_storage > 0:
+            backend.reset(self.get_path(), self.get_casefile())
+            busbar_id = 3
+            nb_el = cls.n_storage
+            el_to_subid = cls.storage_to_subid
+            el_nm = "storage"
+            el_key = "storages_id"
+            el_pos_topo_vect = cls.storage_pos_topo_vect
+            self._aux_check_el_generic(backend, busbar_id, nb_el, el_to_subid, 
+                                       el_nm, el_key, el_pos_topo_vect)
+        else:
+             warnings.warn(f"{type(self).__name__} test_30_n_busbar_per_sub_ok: This test is not performed in depth as your backend does not support storage units (or there are none on the grid)")
+        
\ No newline at end of file
diff --git a/grid2op/tests/helper_path_test.py b/grid2op/tests/helper_path_test.py
index 59bf81ed..e9f5efc3 100644
--- a/grid2op/tests/helper_path_test.py
+++ b/grid2op/tests/helper_path_test.py
@@ -67,11 +67,12 @@ class MakeBackend(ABC, HelperTests):
     def make_backend(self, detailed_infos_for_cascading_failures=False) -> Backend:
         pass
 
-    def make_backend_with_glue_code(self, detailed_infos_for_cascading_failures=False, extra_name="") -> Backend:
+    def make_backend_with_glue_code(self, detailed_infos_for_cascading_failures=False, extra_name="", n_busbar=2) -> Backend:
         Backend._clear_class_attribute()
         bk = self.make_backend(detailed_infos_for_cascading_failures=detailed_infos_for_cascading_failures)
         type(bk)._clear_grid_dependant_class_attributes()
         type(bk).set_env_name(type(self).__name__ + extra_name)
+        type(bk).set_n_busbar_per_sub(n_busbar)
         return bk
     
     def get_path(self) -> str:
diff --git a/grid2op/tests/test_Agent.py b/grid2op/tests/test_Agent.py
index f66c7d5b..007a0fbb 100644
--- a/grid2op/tests/test_Agent.py
+++ b/grid2op/tests/test_Agent.py
@@ -143,7 +143,7 @@ def test_2_busswitch(self):
         expected_reward = dt_float(12277.632)
         expected_reward = dt_float(12076.35644531 / 12.)
         # 1006.363037109375
-        #: Breaking change in 1.9.9: topology are not in the same order
+        #: Breaking change in 1.10.0: topology are not in the same order
         expected_reward = dt_float(1006.34924)  
         assert (
             np.abs(cum_reward - expected_reward) <= self.tol_one
diff --git a/grid2op/tests/test_GridObjects.py b/grid2op/tests/test_GridObjects.py
index 63f4f2f1..5de75ab8 100644
--- a/grid2op/tests/test_GridObjects.py
+++ b/grid2op/tests/test_GridObjects.py
@@ -155,7 +155,7 @@ def test_auxilliary_func(self):
     
     def test_topo_vect_element(self):
         """
-        .. newinversion:: 1.9.9
+        .. newinversion:: 1.10.0
             Test this utilitary function 
         """
         with warnings.catch_warnings():

From 35440329b3725b8768d1205fd1d1de9f2703bbba Mon Sep 17 00:00:00 2001
From: DONNOT Benjamin <benjamin.donnot@rte-france.com>
Date: Wed, 6 Mar 2024 11:45:15 +0100
Subject: [PATCH 5/6] fixing a typo, let's say

---
 grid2op/Space/GridObjects.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grid2op/Space/GridObjects.py b/grid2op/Space/GridObjects.py
index 8388baf7..38d0cdc6 100644
--- a/grid2op/Space/GridObjects.py
+++ b/grid2op/Space/GridObjects.py
@@ -2810,7 +2810,7 @@ def process_grid2op_compat(cls):
             cls.alertable_line_names = []
             cls.alertable_line_ids = []
             
-        if glop_ver < version.parse("1.10.0"):
+        if glop_ver < version.parse("1.10.0.dev0"):
             # this feature did not exists before
             # I need to set it to the default if set elsewhere
             cls.n_busbar_per_sub = DEFAULT_N_BUSBAR_PER_SUB

From a9ffd3cf42a2ae23a62450f7cd777accf61e52ee Mon Sep 17 00:00:00 2001
From: DONNOT Benjamin <benjamin.donnot@rte-france.com>
Date: Wed, 6 Mar 2024 13:43:27 +0100
Subject: [PATCH 6/6] fixing the automatic test suite

---
 grid2op/tests/aaa_test_backend_interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grid2op/tests/aaa_test_backend_interface.py b/grid2op/tests/aaa_test_backend_interface.py
index 8045bc15..8f01f0b6 100644
--- a/grid2op/tests/aaa_test_backend_interface.py
+++ b/grid2op/tests/aaa_test_backend_interface.py
@@ -594,7 +594,7 @@ def _aux_check_topo_vect(self, backend : Backend):
         assert len(topo_vect) == dim_topo, (f"backend.get_topo_vect() should return a vector of size 'dim_topo' "
                                             f"({dim_topo}) but found size is {len(topo_vect)}. "
                                             f"Remember: shunt are not part of the topo_vect")
-        assert np.all(topo_vect <= 2), (f"For simple environment, we suppose there are 2 buses per substation / voltage levels. "
+        assert np.all(topo_vect <= type(backend).n_busbar_per_sub), (f"For simple environment, we suppose there are 2 buses per substation / voltage levels. "
                                         f"topo_vect is supposed to give the id of the busbar (in the substation) to "
                                         f"which the element is connected. This cannot be {np.max(topo_vect)}."
                                         f"NB: this test is expected to fail if you test on a grid where more "