Merge pull request #37 from Farama-Foundation/doc/missing-docstrings

Markdown Documentation for Undocumented Environments
Farama-Foundation · Mar 28, 2024 · e6f186d · e6f186d
2 parents 97c3a76 + 1461de1
commit e6f186d
Show file tree

Hide file tree

Showing 9 changed files with 222 additions and 60 deletions.
diff --git a/momaland/envs/beach/beach.py b/momaland/envs/beach/beach.py
@@ -67,10 +67,7 @@ class MOBeachDomain(MOParallelEnv):
     `[a_type, section_id, section_capacity, section_consumption, %_of_a_of_current_type]`
 
     ## Action Space
-    The action space is a Discrete space, where:
-    - moving left is -1
-    - moving right is +1
-    - staying is 0
+    The action space is a Discrete space [0, 1, 2], corresponding to moving left, moving right, staying in place.
 
     ## Reward Space
     The reward space is a 2D vector containing rewards for two different schemes ('local' or 'global') for:
@@ -92,12 +89,12 @@ class MOBeachDomain(MOParallelEnv):
     The problem is not truncated. It has a maximum number of timesteps.
 
     ## Arguments
-    - 'num_timesteps (int)': number of timesteps in the domain. Default: 100
+    - 'num_timesteps (int)': number of timesteps in the domain. Default: 1
     - 'num_agents (int)': number of agents in the domain. Default: 100
     - 'reward_scheme (str)': the reward scheme to use ('local', or 'global'). Default: local
     - 'sections (int)': number of beach sections in the domain. Default: 6
-    - 'capacity (int)': capacity of each beach section. Default: 10
-    - 'type_distribution (tuple)': the distribution of agent types in the domain. Default: 2 types equally distributed (0.5, 0.5).
+    - 'capacity (int)': capacity of each beach section. Default: 7
+    - 'type_distribution (tuple)': the distribution of agent types in the domain. Default: 2 types equally distributed (0.3, 0.7).
     - 'position_distribution (tuple)': the initial distribution of agents in the domain. Default: uniform over all sections (None).
     - 'render_mode (str)': render mode. Default: None
     """
@@ -106,12 +103,12 @@ class MOBeachDomain(MOParallelEnv):
 
     def __init__(
         self,
-        num_timesteps=10,
+        num_timesteps=1,
         num_agents=100,
         reward_scheme="local",
         sections=6,
-        capacity=10,
-        type_distribution=(0.5, 0.5),
+        capacity=7,
+        type_distribution=(0.3, 0.7),
         position_distribution=None,
         render_mode=None,
     ):

diff --git a/momaland/envs/breakthrough/breakthrough.py b/momaland/envs/breakthrough/breakthrough.py
@@ -69,27 +69,23 @@ class MOBreakthrough(MOAECEnv):
     ## Observation Space
     The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described
     below, and an  `'action_mask'` which holds the legal moves, described in the Legal Actions Mask section below.
-
     The main observation space is 2 planes of a board_height * board_width grid (a board_height * board_width * 2 tensor).
     Each plane represents a specific agent's pieces, and each location in the grid represents the placement of the
     corresponding agent's piece. 1 indicates that the agent has a piece placed in the given location, and 0 indicates they
     do not have a piece in that location (meaning that either the cell is empty, or the other agent has a piece in that
     location).
 
-
-    ### Legal Actions Mask
+    ## Legal Actions Mask
     The legal moves available to the current agent are found in the `action_mask` element of the dictionary observation.
     The `action_mask` is a binary vector where each index of the vector represents whether the represented action is legal
     or not; the action encoding is described in the Action Space section below.
-    The `action_mask` will be all zeros for any agent except the one whose turn it is. Taking an illegal action ends the
-    game with a reward of -1 for the illegally moving agent and a reward of 0 for all other agents. #TODO this isn't happening anymore because of missing TerminateIllegalWrapper
+    The `action_mask` will be all zeros for any agent except the one whose turn it is.
 
     ## Action Space
     The action space is the set of integers from 0 to board_width*board_height*3 (exclusive). If a piece at coordinates
     (x,y) is moved, this is encoded as the integer x * 3 * board_height + y * 3 + z where z == 0 for left diagonal, 1 for
     straight, and 2 for right diagonal move.
 
-
     ## Rewards
     Dimension 0: If an agent moves one of their pieces to the opponent's home row, they will be rewarded 1 point. At the
     same time, the opponent agent will be awarded -1 point. There are no draws in Breakthrough.
@@ -99,6 +95,15 @@ class MOBreakthrough(MOAECEnv):
     Dimension 3: (optional) The negative number of pieces (divided by the max number of pieces)
     an agent has lost to the opponent.
 
+    ## Starting State
+    The starting board is empty except for the first two rows that are filled with pieces of player 0, and the last two rows that are filled with pieces of player 1.
+
+    ## Arguments
+    - 'board_width': The width of the board (from 3 to 20)
+    - 'board_height': The height of the board (from 5 to 20)
+    - 'num_objectives': The number of objectives (from 1 to 4)
+    - 'render_mode': The render mode.
+
     ## Version History
     """
 

diff --git a/momaland/envs/congestion/congestion.py b/momaland/envs/congestion/congestion.py
@@ -50,13 +50,43 @@ def raw_env(**kwargs):
 
 
 class MOCongestion(MOParallelEnv):
-    """Environment for MO-Congestion problem.
+    """A `Parallel` environment where drivers learn to travel from a source to a destination while avoiding congestion.
 
-    The init method takes in environment arguments and should define the following attributes:
-    - possible_agents
-    - action_spaces
-    - observation_spaces
-    These attributes should not be changed after initialization.
+    Multi-objective version of Braess' Paradox where drivers have two objectives: travel time and monetary cost.
+    The environment is a road network and the agents are the drivers that needs to travel from an origin to a destination point.
+
+    ## Observation Space
+    This environment is stateless, so the observation space is a constant 0. (Discrete with shape (1,)).
+
+    ## Action Space
+    The action space is a discrete space representing the possible routes that the agent can take.
+    The number of routes is different for each agent, as it depends on the number of possible routes for the OD pair of the agent.
+    Selecting an action corresponds to choosing a route.
+
+    ## Reward Space
+    The reward space is a 2D vector containing rewards for:
+    - Minimizing travel time (latency).
+    - Minimizing monetary cost.
+
+    ## Starting State
+    The environment is stateless, so there is no starting state.
+
+    ## Episode Termination
+    The environment is stateless, so there are no episodes. Each "episode" is therefore terminated after each timestep.
+
+    ## Episode Truncation
+    Episodes are not truncated as there are terminated after each timestep.
+
+    ## Arguments
+    - `render_mode (str, optional)`: The mode to display the rendering of the environment. Can be human or None.
+    - `problem_name (str, optional)`: The name of the road network that will be used.
+    - `num_agents (int, optional)`: The number of drivers in the network.
+    - `toll_mode (str, optional)`: The tolling mode that is used, tolls are either placed randomly "random" or using marginal cost tolling "mct".
+    - `random_toll_percentage (float, optional)`: In the case of random tolling the percentage of roads that will be taxed.
+    - `num_timesteps (int, optional)`: The number of timesteps (stateless, therefore always 1 timestep).
+
+    ## Credits
+    The code was adapted from [codebase of "Toll-Based Learning for Minimising Congestion under Heterogeneous Preferences"](https://github.com/goramos/marl-route-choice).
     """
 
     def __init__(

diff --git a/momaland/envs/connect4/connect4.py b/momaland/envs/connect4/connect4.py
@@ -84,7 +84,6 @@ class MOConnect4(MOAECEnv, EzPickle):
     ## Observation Space
     The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described
     below, and an  `'action_mask'` which holds the legal moves, described in the Legal Actions Mask section below.
-
     The main observation space is 2 planes of a board_height * board_width grid (a board_height * board_width * 2 tensor).
     Each plane represents a specific agent's tokens, and each location in the grid represents the placement of the
     corresponding agent's token. 1 indicates that the agent has a token placed in the given location, and 0 indicates they
@@ -95,9 +94,7 @@ class MOConnect4(MOAECEnv, EzPickle):
     The legal moves available to the current agent are found in the `action_mask` element of the dictionary observation.
     The `action_mask` is a binary vector where each index of the vector represents whether the represented action is legal
     or not; the action encoding is described in the Action Space section below.
-    The `action_mask` will be all zeros for any agent except the one whose turn it is. Taking an illegal action ends the
-    game with a reward of -1 for the illegally moving agent and a reward of 0 for all other agents. #TODO this isn't happening anymore because of missing TerminateIllegalWrapper
-
+    The `action_mask` will be all zeros for any agent except the one whose turn it is.
 
     ## Action Space
     The action space is the set of integers from 0 to board_width (exclusive), where the number represents which column
@@ -111,6 +108,16 @@ class MOConnect4(MOAECEnv, EzPickle):
     column X, they will be rewarded 1 point in reward dimension 2+X. The opponent agent will be rewarded -1 point. If the
     column has an equal number of tokens from both players, both players are rewarded 0.
 
+    ## Starting State
+    The game starts with an empty board.
+
+    ## Arguments
+    - 'render_mode': The mode to render with. Can be 'human' or 'rgb_array'.
+    - 'screen_scaling': The factor by which to scale the screen.
+    - 'board_width': The width of the board (from 4 to 20)
+    - 'board_height': The height of the board (from 4 to 20)
+    - 'column_objectives': Whether to use column objectives or not (without them, there are 2 objectives. With them, there are 2+board_width objectives)
+
     ## Version History
     """
 

diff --git a/momaland/envs/gem_mining/gem_mining.py b/momaland/envs/gem_mining/gem_mining.py
@@ -52,11 +52,49 @@ def raw_env(**kwargs):
 class MOGemMining(MOParallelEnv):
     """Environment for MO-GemMining domain.
 
-    The init method takes in environment arguments and should define the following attributes:
-    - possible_agents
-    - action_spaces
-    - observation_spaces
-    These attributes should not be changed after initialization.
+    ## Observation Space
+    The observation space is a cBox of the number of agents in length.
+    As this is a stateless environment, all agents receive a "0" observation each timestep.
+
+    ## Action Space
+    The action space is discrete set of integers for each agent, and is agent-specific.
+    Each integer represents the ID of a mine (i.e., local reward function) which is reachable from the village (i.e., agent).
+    Selecting an action represents sending the workers that live in a given village to the corresponding mine.
+
+    ## Reward Space
+    The reward space is a vector containing rewards in each objective (customizable).
+    Each objective corresponds to a type of gem that can be found at the mines.
+    The rewards correspond to the total number of gems of each type found at all the mines together at a given timestep.
+    Please note that as this is a fully cooperative environment all agents receive the same reward vectors.
+
+    ## Starting State
+    As this is a state-less environment the "state" is just a default value. (See Observation Space.)
+
+    ## Episode Termination
+    As this is a state-less environment there isn't really an episode.
+    Hence the episode terminates after each timestep.
+
+    ## Episode Truncation
+    Each "episode" last 1 timestep (due to the bandit setting).
+
+    ## Arguments
+    - `num_agents: number of agents (i.e., villages) in the Gem Mining instance
+    - num_objectives: number of objectives (i.e., gem types), each mine has a probability of generating gems of any type at any timesteps
+    - min_connectivity: the minimum number of mines each agent is connected to. Should be greater or equal to 2
+    - max_connectivity: the maximum number of mines each agent is connected to. Should be greater or equal to min_connectivity
+    - min_workers: the minimum number of workers per village (agent). Should be greater or equal to 1.
+    - max_workers: the maximum number of workers per village (agent). Should be greater or equal to min_workers.
+    - min_prob: the minimum (Bernoulli) probability of finding a gem (per type) at a mine, excluding worker bonus
+    - max_prob: the maximum (Bernoulli) probability of finding a gem (per type) at a mine, excluding worker bonus
+    - trunc_probability: upper limit to the probability of finding a gem after adding the worker bonus
+    - w_bonus: worker bonus; the probability of finding a gem is multiplied by w_bonus^(w-1), where w is the number of workers at a mine
+    - correlated_objectives: if true, the probability of mining a given type of gem at a mine is negatively correlated to finding a gem of another type, and the (non-bonus) expectation of finding any gem is at most max_prob per mine per timestep.
+    - num_timesteps: number of timesteps (stateless, therefore defaultly set to 1 timestep)
+    - render_mode: render mode
+    - seed: This environment is generated randomly using the provided seed. Defaults to 42.
+
+    ## Credits
+    The code was based on previous code by Diederik Roijers and Eugenio Bargiacchi (in different programming languages), and reimplemented.
     """
 
     metadata = {"render_modes": ["human"], "name": "mogem_mining_v0"}

diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py
@@ -42,19 +42,60 @@ def raw_env(**kwargs):
 
 
 class MOIngenious(MOAECEnv):
-    """Environment for the multi-objective Ingenious game."""
+    """Ingenious board game.
+
+    Ingenious is a turn-based board game for multiple players. 2-4 players can play (default is 2), on a hexagonal
+    board with an edge length of 3-10 (default is 6). Each player has 2-6 (default is 6) tiles with colour symbols on
+    their rack (hand). In sequential order, players play one of their tiles onto the hexagonal board, with the goal
+    of establishing lines of matching symbols emerging from the placed tile. This allows the players to increase
+    their score in the respective colors, each color representing one of 2-6 (default is 6) objectives. New tiles are
+    randomly drawn, and the racks of other players with their currently available tiles are not observable (in the
+    default rules). When the board is filled, the original game rules define the winner as the player who has the
+    highest score in their lowest-scoring colour. This implementation exposes the colour scores themselves as
+    different objectives, allowing arbitrary utility functions to be defined over them.
+
+    ## Observation Space
+    The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described
+    below, and an `'action_mask'` which holds the legal moves, described in the Legal Actions Mask section below.
+    The main observation space is a dictionary containing the `'board'`, the `'tiles'`, and the `'scores'`. TODO describe. why do we return the scores of the player?
+
+    ## Legal Actions Mask
+    The legal moves available to the current agent are found in the `action_mask` element of the dictionary observation.
+    The `action_mask` is a binary vector where each index of the vector represents whether the represented action is legal
+    or not; the action encoding is described in the Action Space section below.
+    The `action_mask` will be all zeros for any agent except the one whose turn it is. TODO is this true?
+
+    ## Action Space
+    The action space is the set of integers from 0 to TODO describe action encoding here, with reference to web resource for hex encoding
+
+    ## Rewards
+    The reward dimensions correspond to the 2-6 (default is 6) different colors that the players can score points for.
+
+    ## Starting State
+    The game starts with an empty board, and each player with 2-6 (default is 6) randomly drawn tiles in their hand.
+
+    ## Arguments
+    - 'num_players' (int): The number of players in the environment. Default: 2
+    - 'init_draw' (int): The number of tiles each player draws at the beginning of the game. Default: 6
+    - 'num_colors' (int): The number of colors in the game. Default: 6
+    - 'board_size' (int): The size of the board. Default: 6
+    - 'limitation_score' (int): Maximum score for any color Default: 18
+    - 'render_mode' (str): The rendering mode. Default: None
+
+    ## Version History
+    """
 
     metadata = {"render_modes": ["human"], "name": "moingenious_v0", "is_parallelizable": False}
 
-    def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=8, limitation_score=18, render_mode=None):
+    def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limitation_score=18, render_mode=None):
         """Initializes the multi-objective Ingenious game.
 
         Args:
             num_players (int): The number of players in the environment. Default: 2
             init_draw (int): The number of tiles each player draws at the beginning of the game. Default: 6
-            num_colors (int): The number of colors in the game. Default: 4
-            board_size (int): The size of the board. Default: 8
-            limitation_score(int): Limitation to refresh the score board for any color. Default: 20
+            num_colors (int): The number of colors in the game. Default: 6
+            board_size (int): The size of the board. Default: 6
+            limitation_score (int): Maximum score for any color. Default: 18
             render_mode (str): The rendering mode. Default: None
         """
         self.board_size = board_size

diff --git a/momaland/envs/ingenious/ingenious_base.py b/momaland/envs/ingenious/ingenious_base.py
@@ -100,15 +100,15 @@ def generate_board(board_size):
 class IngeniousBase:
     """Base class for Ingenious environment."""
 
-    def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=8, limitation_score=18):
+    def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limitation_score=18):
         """Initialize the Ingenious environment.
 
         Args:
-            num_players (int): Number of players in the game.
-            init_draw (int): Number of tiles to draw at the beginning of the game.
-            num_colors (int): Number of colors in the game.
-            board_size (int): Size of the board.
-            limitation_score(int): Limitation to refresh the score board for any color. Default: 20
+            num_players (int): The number of players in the environment. Default: 2
+            init_draw (int): The number of tiles each player draws at the beginning of the game. Default: 6
+            num_colors (int): The number of colors in the game. Default: 6
+            board_size (int): The size of the board. Default: 6
+            limitation_score (int): Maximum score for any color. Default: 18
         """
         assert 2 <= num_players <= 5, "Number of players must be between 2 and 5."
         assert 2 <= num_colors <= 6, "Number of colors must be between 2 and 6."