Updated docstrings using darglint (#2827)

* Updated docstrings using darglint, ignoring 402 and 202 plus shortened lines into multiple where they were overflowing * Remove abstract method decorators, for a future PR * Add __future__ import annotation for python 3.7+ notion * Added missing bracket * Fix minor docstring tables
Farama-Foundation · May 25, 2022 · 273e3f2 · 273e3f2
1 parent 4487008
commit 273e3f2
Show file tree

Hide file tree

Showing 37 changed files with 474 additions and 207 deletions.
diff --git a/gym/core.py b/gym/core.py
@@ -1,7 +1,6 @@
 """Core API for Environment, Wrapper, ActionWrapper, RewardWrapper and ObservationWrapper."""
 from __future__ import annotations
 
-from abc import abstractmethod
 from typing import Generic, Optional, SupportsFloat, TypeVar, Union
 
 from gym import spaces
@@ -63,15 +62,14 @@ def np_random(self) -> RandomNumberGenerator:
     def np_random(self, value: RandomNumberGenerator):
         self._np_random = value
 
-    @abstractmethod
     def step(self, action: ActType) -> tuple[ObsType, float, bool, dict]:
         """Run one timestep of the environment's dynamics.
 
         When end of episode is reached, you are responsible for calling :meth:`reset` to reset this environment's state.
         Accepts an action and returns a tuple `(observation, reward, done, info)`.
 
         Args:
-            action (object): an action provided by the agent
+            action (ActType): an action provided by the agent
 
         Returns:
             observation (object): this will be an element of the environment's :attr:`observation_space`.
@@ -88,7 +86,6 @@ def step(self, action: ActType) -> tuple[ObsType, float, bool, dict]:
         """
         raise NotImplementedError
 
-    @abstractmethod
     def reset(
         self,
         *,
@@ -129,7 +126,6 @@ def reset(
         if seed is not None:
             self._np_random, seed = seeding.np_random(seed)
 
-    @abstractmethod
     def render(self, mode="human"):
         """Renders the environment.
 
@@ -152,6 +148,7 @@ def render(self, mode="human"):
             in implementations to use the functionality of this method.
 
         Example:
+            >>> import numpy as np
             >>> class MyEnv(Env):
             ...    metadata = {'render_modes': ['human', 'rgb_array']}
             ...
@@ -161,7 +158,7 @@ def render(self, mode="human"):
             ...        elif mode == 'human':
             ...            ... # pop up a window and render
             ...        else:
-            ...            super(MyEnv, self).render(mode=mode) # just raise an exception
+            ...            super().render(mode=mode) # just raise an exception
 
         Args:
             mode: the mode to render with, valid modes are `env.metadata["render_modes"]`
@@ -208,7 +205,7 @@ def unwrapped(self) -> Env:
         """Returns the base non-wrapped environment.
 
         Returns:
-            gym.Env: The base non-wrapped gym.Env instance
+            Env: The base non-wrapped gym.Env instance
         """
         return self
 
@@ -389,7 +386,6 @@ def step(self, action):
         observation, reward, done, info = self.env.step(action)
         return self.observation(observation), reward, done, info
 
-    @abstractmethod
     def observation(self, observation):
         """Returns a modified observation."""
         raise NotImplementedError
@@ -424,7 +420,6 @@ def step(self, action):
         observation, reward, done, info = self.env.step(action)
         return observation, self.reward(reward), done, info
 
-    @abstractmethod
     def reward(self, reward):
         """Returns a modified ``reward``."""
         raise NotImplementedError
@@ -466,12 +461,10 @@ def step(self, action):
         """Runs the environment :meth:`env.step` using the modified ``action`` from :meth:`self.action`."""
         return self.env.step(self.action(action))
 
-    @abstractmethod
     def action(self, action):
         """Returns a modified action before :meth:`env.step` is called."""
         raise NotImplementedError
 
-    @abstractmethod
     def reverse_action(self, action):
         """Returns a reversed ``action``."""
         raise NotImplementedError
diff --git a/gym/envs/box2d/lunar_lander.py b/gym/envs/box2d/lunar_lander.py
@@ -696,15 +696,16 @@ def heuristic(env, s):
     Args:
         env: The environment
         s (list): The state. Attributes:
-                  s[0] is the horizontal coordinate
-                  s[1] is the vertical coordinate
-                  s[2] is the horizontal speed
-                  s[3] is the vertical speed
-                  s[4] is the angle
-                  s[5] is the angular speed
-                  s[6] 1 if first leg has contact, else 0
-                  s[7] 1 if second leg has contact, else 0
-    returns:
+            s[0] is the horizontal coordinate
+            s[1] is the vertical coordinate
+            s[2] is the horizontal speed
+            s[3] is the vertical speed
+            s[4] is the angle
+            s[5] is the angular speed
+            s[6] 1 if first leg has contact, else 0
+            s[7] 1 if second leg has contact, else 0
+
+    Returns:
          a: The heuristic to be fed into the step function defined above to determine the next step and reward.
     """
 

diff --git a/gym/envs/classic_control/acrobot.py b/gym/envs/classic_control/acrobot.py
@@ -44,8 +44,8 @@ class AcrobotEnv(core.Env):
     The action is discrete, deterministic, and represents the torque applied on the actuated
     joint between the two links.
 
-    | Num | Action                                             | Unit               |
-    |----|-------------------------------------------|---------------|
+    | Num | Action                                | Unit         |
+    |-----|---------------------------------------|--------------|
     | 0   | apply -1 torque to the actuated joint | torque (N m) |
     | 1   | apply 0 torque to the actuated joint  | torque (N m) |
     | 2   | apply 1 torque to the actuated joint  | torque (N m) |
@@ -55,27 +55,29 @@ class AcrobotEnv(core.Env):
     The observation is a `ndarray` with shape `(6,)` that provides information about the
     two rotational joint angles as well as their angular velocities:
 
-    | Num | Observation           | Min                  | Max                |
-    |-----|-----------------------|----------------------|--------------------|
-    | 0   | Cosine of `theta1`         | -1                 | 1                |
-    | 1   | Sine of `theta1`         | -1                 | 1                |
-    | 2   | Cosine of `theta2`            | -1 | 1 |
-    | 3   | Sine of `theta2`            | -1 | 1 |
-    | 4   | Angular velocity of `theta1` |        ~ -12.567 (-4 * pi)         |      ~ 12.567 (4 * pi)   |
-    | 5   | Angular velocity of `theta2` |        ~ -28.274 (-9 * pi)         |      ~ 28.274 (9 * pi)   |
+    | Num | Observation                  | Min                 | Max               |
+    |-----|------------------------------|---------------------|-------------------|
+    | 0   | Cosine of `theta1`           | -1                  | 1                 |
+    | 1   | Sine of `theta1`             | -1                  | 1                 |
+    | 2   | Cosine of `theta2`           | -1                  | 1                 |
+    | 3   | Sine of `theta2`             | -1                  | 1                 |
+    | 4   | Angular velocity of `theta1` | ~ -12.567 (-4 * pi) | ~ 12.567 (4 * pi) |
+    | 5   | Angular velocity of `theta2` | ~ -28.274 (-9 * pi) | ~ 28.274 (9 * pi) |
 
     where
     - `theta1` is the angle of the first joint, where an angle of 0 indicates the first link is pointing directly
     downwards.
-    - `theta2` is ***relative to the angle of the first link.*** An angle of 0 corresponds to having the same angle between the
-    two links.
+    - `theta2` is ***relative to the angle of the first link.***
+        An angle of 0 corresponds to having the same angle between the two links.
 
     The angular velocities of `theta1` and `theta2` are bounded at ±4π, and ±9π rad/s respectively.
     A state of `[1, 0, 1, 0, ..., ...]` indicates that both links are pointing downwards.
 
     ### Rewards
 
-    The goal is to have the free end reach a designated target height in as few steps as possible, and as such all steps that do not reach the goal incur a reward of -1. Achieving the target height results in termination with a reward of 0. The reward threshold is -100.
+    The goal is to have the free end reach a designated target height in as few steps as possible,
+    and as such all steps that do not reach the goal incur a reward of -1.
+    Achieving the target height results in termination with a reward of 0. The reward threshold is -100.
 
     ### Starting State
 
@@ -98,7 +100,8 @@ class AcrobotEnv(core.Env):
     ```
 
     By default, the dynamics of the acrobot follow those described in Sutton and Barto's book
-    [Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html). However, a `book_or_nips` parameter can be modified to change the pendulum dynamics to those described
+    [Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html).
+    However, a `book_or_nips` parameter can be modified to change the pendulum dynamics to those described
     in the original [NeurIPS paper](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html).
 
     ```
@@ -125,7 +128,9 @@ class AcrobotEnv(core.Env):
     - v0: Initial versions release (1.0.0) (removed from gym for v1)
 
     ### References
-    - Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding. In D. Touretzky, M. C. Mozer, & M. Hasselmo (Eds.), Advances in Neural Information Processing Systems (Vol. 8). MIT Press. https://proceedings.neurips.cc/paper/1995/file/8f1d43620bc6bb580df6e80b0dc05c48-Paper.pdf
+    - Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding.
+        In D. Touretzky, M. C. Mozer, & M. Hasselmo (Eds.), Advances in Neural Information Processing Systems (Vol. 8).
+        MIT Press. https://proceedings.neurips.cc/paper/1995/file/8f1d43620bc6bb580df6e80b0dc05c48-Paper.pdf
     - Sutton, R. S., Barto, A. G. (2018 ). Reinforcement Learning: An Introduction. The MIT Press.
     """
 
@@ -380,6 +385,8 @@ def bound(x, m, M=None):
 
     Args:
         x: scalar
+        m: The lower bound
+        M: The upper bound
 
     Returns:
         x: scalar, bound between min (m) and Max (M)
@@ -398,15 +405,15 @@ def rk4(derivs, y0, t):
     yourself stranded on a system w/o scipy.  Otherwise use
     :func:`scipy.integrate`.
 
-    Example:
+    Example for 2D system:
 
-        >>> ### 2D system
         >>> def derivs(x):
         ...     d1 =  x[0] + 2*x[1]
         ...     d2 =  -3*x[0] + 4*x[1]
-        ...     return (d1, d2)
+        ...     return d1, d2
+
         >>> dt = 0.0005
-        >>> t = arange(0.0, 2.0, dt)
+        >>> t = np.arange(0.0, 2.0, dt)
         >>> y0 = (1,2)
         >>> yout = rk4(derivs, y0, t)
 

diff --git a/gym/envs/classic_control/cartpole.py b/gym/envs/classic_control/cartpole.py
@@ -17,40 +17,47 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
     """
     ### Description
 
-    This environment corresponds to the version of the cart-pole problem
-    described by Barto, Sutton, and Anderson in ["Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem"](https://ieeexplore.ieee.org/document/6313077).
-    A pole is attached by an un-actuated joint to a cart, which moves along a
-    frictionless track. The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces in the left and right direction on the cart.
+    This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson in
+    ["Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem"](https://ieeexplore.ieee.org/document/6313077).
+    A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
+    The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces
+     in the left and right direction on the cart.
 
     ### Action Space
 
-    The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction of the fixed force the cart is pushed with.
+    The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction
+     of the fixed force the cart is pushed with.
 
     | Num | Action                 |
     |-----|------------------------|
     | 0   | Push cart to the left  |
     | 1   | Push cart to the right |
 
-    **Note**: The velocity that is reduced or increased by the applied force is not fixed and it depends on the angle the pole is pointing. The center of gravity of the pole varies the amount of energy needed to move the cart underneath it
+    **Note**: The velocity that is reduced or increased by the applied force is not fixed and it depends on the angle
+     the pole is pointing. The center of gravity of the pole varies the amount of energy needed to move the cart underneath it
 
     ### Observation Space
 
     The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities:
 
-    | Num | Observation           | Min                  | Max                |
-    |-----|-----------------------|----------------------|--------------------|
-    | 0   | Cart Position         | -4.8                 | 4.8                |
-    | 1   | Cart Velocity         | -Inf                 | Inf                |
-    | 2   | Pole Angle            | ~ -0.418 rad (-24°)  | ~ 0.418 rad (24°)  |
-    | 3   | Pole Angular Velocity | -Inf                 | Inf                |
+    | Num | Observation           | Min                 | Max               |
+    |-----|-----------------------|---------------------|-------------------|
+    | 0   | Cart Position         | -4.8                | 4.8               |
+    | 1   | Cart Velocity         | -Inf                | Inf               |
+    | 2   | Pole Angle            | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) |
+    | 3   | Pole Angular Velocity | -Inf                | Inf               |
 
-    **Note:** While the ranges above denote the possible values for observation space of each element, it is not reflective of the allowed values of the state space in an unterminated episode. Particularly:
-    -  The cart x-position (index 0) can be take values between `(-4.8, 4.8)`, but the episode terminates if the cart leaves the `(-2.4, 2.4)` range.
-    -  The pole angle can be observed between  `(-.418, .418)` radians (or **±24°**), but the episode terminates if the pole angle is not in the range `(-.2095, .2095)` (or **±12°**)
+    **Note:** While the ranges above denote the possible values for observation space of each element,
+        it is not reflective of the allowed values of the state space in an unterminated episode. Particularly:
+    -  The cart x-position (index 0) can be take values between `(-4.8, 4.8)`, but the episode terminates
+       if the cart leaves the `(-2.4, 2.4)` range.
+    -  The pole angle can be observed between  `(-.418, .418)` radians (or **±24°**), but the episode terminates
+       if the pole angle is not in the range `(-.2095, .2095)` (or **±12°**)
 
     ### Rewards
 
-    Since the goal is to keep the pole upright for as long as possible, a reward of `+1` for every step taken, including the termination step, is allotted. The threshold for rewards is 475 for v1.
+    Since the goal is to keep the pole upright for as long as possible, a reward of `+1` for every step taken,
+    including the termination step, is allotted. The threshold for rewards is 475 for v1.
 
     ### Starting State
 

diff --git a/gym/envs/classic_control/continuous_mountain_car.py b/gym/envs/classic_control/continuous_mountain_car.py
@@ -49,14 +49,15 @@ class Continuous_MountainCarEnv(gym.Env):
 
     The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:
 
-    | Num | Observation                                                 | Min                | Max    | Unit |
-    |-----|-------------------------------------------------------------|--------------------|--------|------|
-    | 0   | position of the car along the x-axis                        | -Inf               | Inf    | position (m) |
-    | 1   | velocity of the car                                         | -Inf               | Inf  | position (m) |
+    | Num | Observation                          | Min  | Max | Unit         |
+    |-----|--------------------------------------|------|-----|--------------|
+    | 0   | position of the car along the x-axis | -Inf | Inf | position (m) |
+    | 1   | velocity of the car                  | -Inf | Inf | position (m) |
 
     ### Action Space
 
-    The action is a `ndarray` with shape `(1,)`, representing the directional force applied on the car. The action is clipped in the range `[-1,1]` and multiplied by a power of 0.0015.
+    The action is a `ndarray` with shape `(1,)`, representing the directional force applied on the car.
+    The action is clipped in the range `[-1,1]` and multiplied by a power of 0.0015.
 
     ### Transition Dynamics:
 
@@ -66,15 +67,20 @@ class Continuous_MountainCarEnv(gym.Env):
 
     *position<sub>t+1</sub> = position<sub>t</sub> + velocity<sub>t+1</sub>*
 
-    where force is the action clipped to the range `[-1,1]` and power is a constant 0.0015. The collisions at either end are inelastic with the velocity set to 0 upon collision with the wall. The position is clipped to the range [-1.2, 0.6] and velocity is clipped to the range [-0.07, 0.07].
+    where force is the action clipped to the range `[-1,1]` and power is a constant 0.0015.
+    The collisions at either end are inelastic with the velocity set to 0 upon collision with the wall.
+    The position is clipped to the range [-1.2, 0.6] and velocity is clipped to the range [-0.07, 0.07].
 
     ### Reward
 
-    A negative reward of *-0.1 * action<sup>2</sup>* is received at each timestep to penalise for taking actions of large magnitude. If the mountain car reaches the goal then a positive reward of +100 is added to the negative reward for that timestep.
+    A negative reward of *-0.1 * action<sup>2</sup>* is received at each timestep to penalise for
+    taking actions of large magnitude. If the mountain car reaches the goal then a positive reward of +100
+    is added to the negative reward for that timestep.
 
     ### Starting State
 
-    The position of the car is assigned a uniform random value in `[-0.6 , -0.4]`. The starting velocity of the car is always assigned to 0.
+    The position of the car is assigned a uniform random value in `[-0.6 , -0.4]`.
+    The starting velocity of the car is always assigned to 0.
 
     ### Episode Termination