diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d4899d2b0..b38fd2834 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,6 +65,6 @@ repos: language: node pass_filenames: false types: [python] - additional_dependencies: ["pyright"] + additional_dependencies: ["pyright@1.1.347"] args: - --project=pyproject.toml diff --git a/README.md b/README.md index 9ebd08f57..9604e2470 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ -[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8127025.svg)](https://doi.org/10.5281/zenodo.8127025) - +[![Python](https://img.shields.io/pypi/pyversions/gymnasium.svg)](https://badge.fury.io/py/gymnasium) +[![PyPI](https://badge.fury.io/py/gymnasium.svg)](https://badge.fury.io/py/gymnasium) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8127025.svg)](https://doi.org/10.5281/zenodo.8127025) +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
diff --git a/docs/_scripts/atari-docs.json b/docs/_scripts/atari-docs.json
index 16fece669..63962bba8 100644
--- a/docs/_scripts/atari-docs.json
+++ b/docs/_scripts/atari-docs.json
@@ -12,32 +12,32 @@
"alien": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=815",
"env_description": "You are stuck in a maze-like space ship with three aliens. You goal is to destroy their eggs that are scattered all over the ship while simultaneously avoiding the aliens (they are trying to kill you). You have a flamethrower that can help you turn them away in tricky situations. Moreover, you can occasionally collect a power-up (pulsar) that gives you the temporary ability to kill aliens.",
- "reward_description": "### Rewards\n\nYou score points by destroying eggs, killing aliens, using pulsars, and collecting special prizes. When you are caught\nby an alien, you will lose one of your lives. The number of lives you have depends on the game flavor. For a\ntable of scores corresponding to the different achievements, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815).\n"
+ "reward_description": "## Rewards\nYou score points by destroying eggs, killing aliens, using pulsars, and collecting special prizes. When you are caught by an alien, you will lose one of your lives. The number of lives you have depends on the game flavor. For a table of scores corresponding to the different achievements, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815)."
},
"amidar": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=817",
"env_description": "This game is similar to Pac-Man: You are trying to visit all places on a 2-dimensional grid while simultaneously avoiding your enemies. You can turn the tables at one point in the game: Your enemies turn into chickens and you can catch them.",
- "reward_description": "### Rewards\n\nYou score points by traversing new parts of the grid. Coloring an entire box in the maze or catching chickens gives extra points.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817).\n"
+ "reward_description": "## Rewards\nYou score points by traversing new parts of the grid. Coloring an entire box in the maze or catching chickens gives extra points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817)."
},
"assault": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=827",
- "env_description": "You control a vehicle that can move sideways. A big mother ship circles overhead and continually deploys smaller drones.You must destroy these enemies and dodge their attacks.",
+ "env_description": "You control a vehicle that can move sideways. A big mother ship circles overhead and continually deploys smaller drones. You must destroy these enemies and dodge their attacks.",
"reward_description": ""
},
"asterix": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=3325",
"env_description": "You are Asterix and can move horizontally (continuously) and vertically (discretely). Objects move horizontally across the screen: lyres and other (more useful) objects. Your goal is to guideAsterix in such a way as to avoid lyres and collect as many other objects as possible. You score points by collecting objects and lose a life whenever you collect a lyre. You have three lives available at the beginning. If you score sufficiently many points, you will be awarded additional points.",
- "reward_description": "### Rewards\n\nA table of scores awarded for collecting the different objects is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325).\n"
+ "reward_description": "## Rewards\nA table of scores awarded for collecting the different objects is provided on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325)."
},
"asteroids": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=828",
"env_description": "This is a well-known arcade game: You control a spaceship in an asteroid field and must break up asteroids by shooting them. Once all asteroids are destroyed, you enter a new level and new asteroids will appear. You will occasionally be attacked by a flying saucer.",
- "reward_description": "### Rewards\n\nYou score points for destroying asteroids, satellites and UFOs. The smaller the asteroid, the more points you score\nfor destroying it.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL).\n"
+ "reward_description": "## Rewards\nYou score points for destroying asteroids, satellites and UFOs. The smaller the asteroid, the more points you score for destroying it. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL)."
},
"atlantis": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=835",
- "env_description": "Your job is to defend the submerged city of Atlantis. Your enemies slowly descend towards the city and you must destroy them before they reach striking distance. To this end, you control three defense posts.You lose if your enemies manage to destroy all seven of Atlantis' installations. You may rebuild installations after you have fought of a wave of enemies and scored a sufficient number of points.",
- "reward_description": "### Rewards\n\nYou score points for destroying enemies, keeping installations protected during attack waves. You score more points\nif you manage to destroy your enemies with one of the outer defense posts.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835).\n"
+ "env_description": "Your job is to defend the submerged city of Atlantis. Your enemies slowly descend towards the city and you must destroy them before they reach striking distance. To this end, you control three defense posts. You lose if your enemies manage to destroy all seven of Atlantis' installations. You may rebuild installations after you have fought of a wave of enemies and scored a sufficient number of points.",
+ "reward_description": "## Rewards\nYou score points for destroying enemies, keeping installations protected during attack waves. You score more points if you manage to destroy your enemies with one of the outer defense posts. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835)."
},
"atlantis2": {
"atariage_url": "",
@@ -51,28 +51,28 @@
},
"bank_heist": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=1008",
- "env_description": "You are a bank robber and (naturally) want to rob as many banks as possible. You control your getaway car and must navigate maze-like cities. The police chases you and will appear whenever you rob a bank. You may destroy police cars by dropping sticks of dynamite. You can fill up your gas tank by entering a new city.At the beginning of the game you have four lives. Lives are lost if you run out of gas, are caught by the police,or run over the dynamite you have previously dropped.",
- "reward_description": "### Rewards\n\nYou score points for robbing banks and destroying police cars. If you rob nine or more banks, and then leave the city,\nyou will score extra points.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008).\n"
+ "env_description": "You are a bank robber and (naturally) want to rob as many banks as possible. You control your getaway car and must navigate maze-like cities. The police chases you and will appear whenever you rob a bank. You may destroy police cars by dropping sticks of dynamite. You can fill up your gas tank by entering a new city. At the beginning of the game you have four lives. Lives are lost if you run out of gas, are caught by the police,or run over the dynamite you have previously dropped.",
+ "reward_description": "## Rewards\nYou score points for robbing banks and destroying police cars. If you rob nine or more banks, and then leave the city, you will score extra points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008)."
},
"basic_math": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=14",
- "env_description": "You must solve basic math problems using a joystick\nto scroll to the correct numeric answer.",
+ "env_description": "You must solve basic math problems using a joystick to scroll to the correct numeric answer.",
"reward_description": ""
},
"battle_zone": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=859",
"env_description": "You control a tank and must destroy enemy vehicles. This game is played in a first-person perspective and creates a 3D illusion. A radar screen shows enemies around you. You start with 5 lives and gain up to 2 extra lives if you reach a sufficient score.",
- "reward_description": "### Rewards\n\nYou receive points for destroying enemies.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL).\n"
+ "reward_description": "## Rewards\nYou receive points for destroying enemies. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL)."
},
"beam_rider": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=860",
"env_description": "You control a space-ship that travels forward at a constant speed. You can only steer it sideways between discrete positions. Your goal is to destroy enemy ships, avoid their attacks and dodge space debris.",
- "reward_description": "### Rewards\n\nYou score points for destroying enemies.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL).\n"
+ "reward_description": "## Rewards\nYou score points for destroying enemies. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL)."
},
"berzerk": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=866",
- "env_description": "You are stuck in a maze with evil robots. You must destroy them and avoid touching the walls of the maze, as this will kill you. You may be awarded extra lives after scoring a sufficient number of points, depending on the game mode.You may also be chased by an undefeatable enemy, Evil Otto, that you must avoid. Evil Otto does not appear in the default mode.",
- "reward_description": "### Rewards\n\nYou score points for destroying robots.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL).\n"
+ "env_description": "You are stuck in a maze with evil robots. You must destroy them and avoid touching the walls of the maze, as this will kill you. You may be awarded extra lives after scoring a sufficient number of points, depending on the game mode. You may also be chased by an undefeatable enemy, Evil Otto, that you must avoid. Evil Otto does not appear in the default mode.",
+ "reward_description": "## Rewards\nYou score points for destroying robots. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL)."
},
"blackjack": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=34",
@@ -82,22 +82,22 @@
"bowling": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=879",
"env_description": "Your goal is to score as many points as possible in the game of Bowling. A game consists of 10 frames and you have two tries per frame. Knocking down all pins on the first try is called a \"strike\". Knocking down all pins on the second roll is called a \"spar\". Otherwise, the frame is called \"open\".",
- "reward_description": "### Rewards\n\nYou receive points for knocking down pins. The exact score depends on whether you manage a \"strike\", \"spare\" or \"open\"\nframe. Moreover, the points you score for one frame may depend on following frames.\nYou can score up to 300 points in one game (if you manage to do 12 strikes).\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879).\n"
+ "reward_description": "## Rewards\nYou receive points for knocking down pins. The exact score depends on whether you manage a \"strike\", \"spare\" or \"open\" frame. Moreover, the points you score for one frame may depend on following frames. You can score up to 300 points in one game (if you manage to do 12 strikes). For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879)."
},
"boxing": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=882",
"env_description": "You fight an opponent in a boxing ring. You score points for hitting the opponent. If you score 100 points, your opponent is knocked out.",
- "reward_description": "### Rewards\n\nYou score points by landing punches.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882).\n"
+ "reward_description": "## Rewards\nYou score points by landing punches. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882)."
},
"breakout": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=889",
"env_description": "Another famous Atari game. The dynamics are similar to pong: You move a paddle and hit the ball in a brick wall at the top of the screen. Your goal is to destroy the brick wall. You can try to break through the wall and let the ball wreak havoc on the other side, all on its own! You have five lives.",
- "reward_description": "### Rewards\n\nYou score points by destroying bricks in the wall. The reward for destroying a brick depends on the color of the brick.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889).\n"
+ "reward_description": "## Rewards\nYou score points by destroying bricks in the wall. The reward for destroying a brick depends on the color of the brick. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889)."
},
"carnival": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=908",
"env_description": "This is a \"shoot 'em up\" game. Targets move horizontally across the screen and you must shoot them. You are in control of a gun that can be moved horizontally. The supply of ammunition is limited and chickens may steal some bullets from you if you don't hit them in time.",
- "reward_description": "### Rewards\n\nYou score points by destroying targets. Points (or bullets) may be subtracted if you hit the target when it shows a minus sign.\nYou will score extra points if it shows a plus sign!\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908).\n"
+ "reward_description": "## Rewards\nYou score points by destroying targets. Points (or bullets) may be subtracted if you hit the target when it shows a minus sign. You will score extra points if it shows a plus sign! For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908)."
},
"casino": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=70",
@@ -107,17 +107,17 @@
"centipede": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=911",
"env_description": "You are an elf and must use your magic wands to fend off spiders, fleas and centipedes. Your goal is to protect mushrooms in an enchanted forest. If you are bitten by a spider, flea or centipede, you will be temporally paralyzed and you will lose a magic wand. The game ends once you have lost all wands. You may receive additional wands after scoring a sufficient number of points.",
- "reward_description": "### Rewards\n\nYou score points by hitting centipedes, scorpions, fleas and spiders. Additional points are awarded after every round\n(i.e. after you have lost a wand) for mushrooms that were not destroyed.\nDetailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911).\n"
+ "reward_description": "## Rewards\nYou score points by hitting centipedes, scorpions, fleas and spiders. Additional points are awarded after every round (i.e. after you have lost a wand) for mushrooms that were not destroyed. Detailed documentation can be found on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911)."
},
"chopper_command": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=921",
- "env_description": "You control a helicopter and must protect truck convoys. To that end, you need to shoot down enemy aircraft.A mini-map is displayed at the bottom of the screen.",
- "reward_description": "### Rewards\n\nYou score points by destroying planes and other helicopters. You score extra points at the end of every wave, depending on the number\nof trucks that have survived.\nDetailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921).\n"
+ "env_description": "You control a helicopter and must protect truck convoys. To that end, you need to shoot down enemy aircraft. A mini-map is displayed at the bottom of the screen.",
+ "reward_description": "## Rewards\nYou score points by destroying planes and other helicopters. You score extra points at the end of every wave, depending on the number of trucks that have survived. Detailed documentation can be found on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921)."
},
"crazy_climber": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=113",
"env_description": "You are a climber trying to reach the top of four buildings, while avoiding obstacles like closing windows and falling objects. When you receive damage (windows closing or objects) you will fall and lose one life; you have a total of 5 lives before the end games. At the top of each building, there's a helicopter which you need to catch to get to the next building. The goal is to climb as fast as possible while receiving the least amount of damage.",
- "reward_description": "### Rewards\n\nA table of scores awarded for completing each row of a building is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113).\n"
+ "reward_description": "## Rewards\nA table of scores awarded for completing each row of a building is provided on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113)."
},
"crossbow": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=115",
@@ -131,13 +131,13 @@
},
"defender": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=128",
- "env_description": "Aliens attack the earth. You control a spaceship and must defend humanity by destroying alien ships and rescuing humanoids.You have three lives and three smart bombs. You lose a live when you are shot down by an alien spaceship.Points are scored by destroying enemies and retrieving humans that are being abducted. You have an unlimited number of laser missiles.",
- "reward_description": "### Rewards\n\nYou receive points for destroying enemies, rescuing abducted humans and keeping humans alive.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128).\n"
+ "env_description": "Aliens attack the earth. You control a spaceship and must defend humanity by destroying alien ships and rescuing humanoids. You have three lives and three smart bombs. You lose a live when you are shot down by an alien spaceship. Points are scored by destroying enemies and retrieving humans that are being abducted. You have an unlimited number of laser missiles.",
+ "reward_description": "## Rewards\nYou receive points for destroying enemies, rescuing abducted humans and keeping humans alive. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128)."
},
"demon_attack": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=135",
"env_description": "You are facing waves of demons in the ice planet of Krybor. Points are accumulated by destroying demons. You begin with 3 reserve bunkers, and can increase its number (up to 6) by avoiding enemy attacks. Each attack wave you survive without any hits, grants you a new bunker. Every time an enemy hits you, a bunker is destroyed. When the last bunker falls, the next enemy hit will destroy you and the game ends.",
- "reward_description": "### Rewards\n\nEach enemy you slay gives you points. The amount of points depends on the type of demon and which\nwave you are in. A detailed table of scores is provided on [the AtariAge\npage](https://atariage.com/manual_html_page.php?SoftwareLabelID=135).\n"
+ "reward_description": "## Rewards\nEach enemy you slay gives you points. The amount of points depends on the type of demon and which wave you are in. A detailed table of scores is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=135)."
},
"donkey_kong": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=149",
@@ -147,7 +147,7 @@
"double_dunk": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=153",
"env_description": "You are playing a 2v2 game of basketball. At the start of each possession, you select between a set of different plays and then execute them to either score or prevent your rivals from scoring.",
- "reward_description": "### Rewards\n\nScores follow the rules of basketball. You can get either 3 points, 2 points foul line) depending\nfrom where you shoot. After a defensive foul, a successful shot from the foul line gives you 1\npoint.\n"
+ "reward_description": "## Rewards\nScores follow the rules of basketball. You can get either 3 points, 2 points foul line) depending from where you shoot. After a defensive foul, a successful shot from the foul line gives you 1 point."
},
"earthworld": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=541",
@@ -156,13 +156,13 @@
},
"elevator_action": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=1131",
- "env_description": "You are a secret agent that must retrieve some secret documents and reach the ground level of a building by going down an elevator/stairs. Once you reach the ground level, you are picked up and taken to the next level. You are equipped with a gun to defend yourself against enemy agents waiting for you in each floor. You gather points by shooting down enemy agents and visiting apartments marked with a red door, which contain the secret documents.This is an unreleased prototype based on the arcade game.",
- "reward_description": "### Rewards\n\nYou start with 4 lives and are awarded 100 points for each enemy shot, and 500 points for each\nsecret document collected (visiting a red door). Each time you get shot you lose one life and the\ngame ends when losing all lives.\n"
+ "env_description": "You are a secret agent that must retrieve some secret documents and reach the ground level of a building by going down an elevator/stairs. Once you reach the ground level, you are picked up and taken to the next level. You are equipped with a gun to defend yourself against enemy agents waiting for you in each floor. You gather points by shooting down enemy agents and visiting apartments marked with a red door, which contain the secret documents. This is an unreleased prototype based on the arcade game.",
+ "reward_description": "## Rewards\nYou start with 4 lives and are awarded 100 points for each enemy shot, and 500 points for each secret document collected (visiting a red door). Each time you get shot you lose one life and the game ends when losing all lives."
},
"enduro": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=163",
"env_description": "You are a racer in the National Enduro, a long-distance endurance race. You must overtake a certain amount of cars each day to stay on the race. The first day you need to pass 200 cars, and 300 foreach following day. The game ends if you do not meet your overtake quota for the day.",
- "reward_description": "### Rewards\n\nYou get 1 point for each vehicle you overtake.\n"
+ "reward_description": "## Rewards\nYou get 1 point for each vehicle you overtake."
},
"entombed": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=165",
@@ -177,7 +177,7 @@
"fishing_derby": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=182",
"env_description": "Your objective is to catch more sunfish than your opponent.",
- "reward_description": "### Rewards\n\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=182).\n\nAtari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1)."
+ "reward_description": "## Rewards\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=182)."
},
"flag_capture": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=183",
@@ -187,7 +187,7 @@
"freeway": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=192",
"env_description": "Your objective is to guide your chicken across lane after lane of busy rush hour traffic. You receive a point for every chicken that makes it to the top of the screen after crossing all the lanes of traffic.",
- "reward_description": "### Rewards\n\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=192).\n\nAtari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1)."
+ "reward_description": "## Rewards\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=192)."
},
"frogger": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=194",
@@ -197,7 +197,7 @@
"frostbite": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=199",
"env_description": "In Frostbite, the player controls \"Frostbite Bailey\" who hops back and forth across across an Arctic river, changing the color of the ice blocks from white to blue. Each time he does so, a block is added to his igloo.",
- "reward_description": "### Rewards\n\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=199).\n\nAtari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1)."
+ "reward_description": "## Rewards\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=199)."
},
"galaxian": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=202",
@@ -207,12 +207,12 @@
"gopher": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=218",
"env_description": "The player controls a shovel-wielding farmer who protects a crop of three carrots from a gopher.",
- "reward_description": "### Rewards\n\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=218).\n\nAtari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1)."
+ "reward_description": "## Rewards\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=218)."
},
"gravitar": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=223",
"env_description": "The player controls a small blue spacecraft. The game starts in a fictional solar system with several planets to explore. If the player moves his ship into a planet, he will be taken to a side-view landscape.",
- "reward_description": "### Rewards\n\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=223).\n\nAtari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1)."
+ "reward_description": "## Rewards\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=223)."
},
"hangman": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=231",
@@ -227,7 +227,7 @@
"hero": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=228",
"env_description": "You need to rescue miners that are stuck in a mine shaft. You have access to various tools: A propeller backpack that allows you to fly wherever you want, sticks of dynamite that can be used to blast through walls, a laser beam to kill vermin, and a raft to float across stretches of lava.You have a limited amount of power. Once you run out, you lose a live.",
- "reward_description": "### Rewards\n\nYou score points for shooting critters, rescuing miners, and dynamiting walls.\nExtra points are rewarded for any power remaining after rescuing a miner.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228).\n"
+ "reward_description": "## Rewards\nYou score points for shooting critters, rescuing miners, and dynamiting walls. Extra points are rewarded for any power remaining after rescuing a miner. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228)."
},
"human_cannonball": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=238",
@@ -236,18 +236,18 @@
},
"ice_hockey": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=241",
- "env_description": "Your goal is to score as many points as possible in a standard game of Ice Hockey over a 3-minute time period. The ball is usually called \"the puck\".There are 32 shot angles ranging from the extreme left to the extreme right. The angles can only aim towards the opponent's goal.Just as in real hockey, you can pass the puck by shooting it off the sides of the rink. This can be really key when you're in position to score a goal.",
- "reward_description": "### Rewards\n\nYou score points by shooting the puck into your opponent's goal. Your opponent scores in the same manner.\nThere are no limits to how many points you can get per game, other than the time limit of 3-minute games.\nFor a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241).\n"
+ "env_description": "Your goal is to score as many points as possible in a standard game of Ice Hockey over a 3-minute time period. The ball is usually called \"the puck\".There are 32 shot angles ranging from the extreme left to the extreme right. The angles can only aim towards the opponent's goal. Just as in real hockey, you can pass the puck by shooting it off the sides of the rink. This can be really key when you're in position to score a goal.",
+ "reward_description": "## Rewards\nYou score points by shooting the puck into your opponent's goal. Your opponent scores in the same manner. There are no limits to how many points you can get per game, other than the time limit of 3-minute games.For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241)."
},
"jamesbond": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=250",
- "env_description": "Your mission is to control Mr. Bond's specially designed multipurpose craft to complete a variety of missions.The craft moves forward with a right motion and slightly back with a left motion.An up or down motion causes the craft to jump or dive.You can also fire by either lobbing a bomb to the bottom of the screen or firing a fixed angle shot to the top of the screen.",
- "reward_description": "### Rewards\n\nThe game ends when you complete the last mission or when you lose the last craft. In either case, you'll receive your final score.\nThere will be a rating based on your score. The highest rating in NOVICE is 006. The highest rating in AGENT is 007.\nFor a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250).\n"
+ "env_description": "Your mission is to control Mr. Bond's specially designed multipurpose craft to complete a variety of missions. The craft moves forward with a right motion and slightly back with a left motion. An up or down motion causes the craft to jump or dive. You can also fire by either lobbing a bomb to the bottom of the screen or firing a fixed angle shot to the top of the screen.",
+ "reward_description": "## Rewards\nThe game ends when you complete the last mission or when you lose the last craft. In either case, you'll receive your final score. There will be a rating based on your score. The highest rating in NOVICE is 006. The highest rating in AGENT is 007. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250)."
},
"journey_escape": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=252",
- "env_description": "You must lead all 5 members of JOURNEY through waves of pesky characters and backstage obstacles to the Scarab Escape Vehicle before time runs out.You must also protect $50,000 in concert cash from grasping groupies, photographers, and promoters.",
- "reward_description": "### Rewards\n\nAt the start of the game, you will have $50,000 and 60 units of time.\nYour end game score with be dependent on how much time you have remaining and who you encounter along the way.\nFor a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252).\n"
+ "env_description": "You must lead all 5 members of JOURNEY through waves of pesky characters and backstage obstacles to the Scarab Escape Vehicle before time runs out. You must also protect $50,000 in concert cash from grasping groupies, photographers, and promoters.",
+ "reward_description": "## Rewards\nAt the start of the game, you will have $50,000 and 60 units of time. Your end game score with be dependent on how much time you have remaining and who you encounter along the way. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252)."
},
"kaboom": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=257",
@@ -256,8 +256,8 @@
},
"kangaroo": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=923",
- "env_description": "The object of the game is to score as many points as you can while controlling Mother Kangaroo to rescue her precious baby. You start the game with three lives.During this rescue mission, Mother Kangaroo encounters many obstacles. You need to help her climb ladders, pick bonus fruit, and throw punches at monkeys.",
- "reward_description": "### Rewards\n\nYour score will be shown at the top right corner of the game.\nYour end game score with be dependent on how much time you have remaining and who you encounter along the way.\nFor a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923).\n"
+ "env_description": "The object of the game is to score as many points as you can while controlling Mother Kangaroo to rescue her precious baby. You start the game with three lives. During this rescue mission, Mother Kangaroo encounters many obstacles. You need to help her climb ladders, pick bonus fruit, and throw punches at monkeys.",
+ "reward_description": "## Rewards\nYour score will be shown at the top right corner of the game. Your end game score with be dependent on how much time you have remaining and who you encounter along the way. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923)."
},
"keystone_kapers": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=261",
@@ -281,8 +281,8 @@
},
"krull": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=267",
- "env_description": "Your mission is to find and enter the Beast's Black Fortress, rescue Princess Lyssa, and destroy the Beast.The task is not an easy one, for the location of the Black Fortress changes with each sunrise on Krull.",
- "reward_description": "### Rewards\n\nYou will receive various scores for each monster you kill.\nYou can play the game until you have lost all your lives.\nFor a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267).\n"
+ "env_description": "Your mission is to find and enter the Beast's Black Fortress, rescue Princess Lyssa, and destroy the Beast. The task is not an easy one, for the location of the Black Fortress changes with each sunrise on Krull.",
+ "reward_description": "## Rewards\nYou will receive various scores for each monster you kill. You can play the game until you have lost all your lives. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267)."
},
"kung_fu_master": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=268",
@@ -291,7 +291,7 @@
},
"laser_gates": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=271",
- "env_description": "The Cryptic Computer is malfunctioning! Use your Dante Dart to navigate through the computer and deestroy the four Failsafe Detonators.",
+ "env_description": "The Cryptic Computer is malfunctioning! Use your Dante Dart to navigate through the computer and destroy the four Failsafe Detonators.",
"reward_description": ""
},
"lost_luggage": {
@@ -347,7 +347,7 @@
"pitfall": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=360",
"env_description": "You control Pitfall Harry and are tasked with collecting all the treasures in a jungle within 20 minutes. You have three lives. The game is over if you collect all the treasures or if you die or if the time runs out.",
- "reward_description": "### Rewards\n\nYou get score points for collecting treasure, you lose points through some misfortunes like falling down a hole.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360).\n"
+ "reward_description": "## Rewards\nYou get score points for collecting treasure, you lose points through some misfortunes like falling down a hole. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360)."
},
"pitfall2": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=359",
@@ -357,42 +357,42 @@
"pong": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=587",
"env_description": "You control the right paddle, you compete against the left paddle controlled by the computer. You each try to keep deflecting the ball away from your goal and into your opponent's goal.",
- "reward_description": "### Rewards\n\nYou get score points for getting the ball to pass the opponent's paddle. You lose points if the ball passes your paddle.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587).\n"
+ "reward_description": "## Rewards\nYou get score points for getting the ball to pass the opponent's paddle. You lose points if the ball passes your paddle. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587)."
},
"pooyan": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=372",
"env_description": "You are a mother pig protecting her piglets (Pooyans) from wolves. In the first scene, you can move up and down a rope. Try to shoot the worker's balloons, while guarding yourself from attacks. If the wolves reach the ground safely they will get behind and try to eat you. In the second scene, the wolves try to float up. You have to try and stop them using arrows and bait. You die if a wolf eats you, or a stone or rock hits you.",
- "reward_description": "### Rewards\n\nIf you hit a balloon, wolf or stone with an arrow you score points.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372).\n"
+ "reward_description": "## Rewards\nIf you hit a balloon, wolf or stone with an arrow you score points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372)."
},
"private_eye": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=376",
"env_description": "You control the French Private Eye Pierre Touche. Navigate the city streets, parks, secret passages, dead-ends and one-ways in search of the ringleader, Henri Le Fiend and his gang. You also need to find evidence and stolen goods that are scattered about. There are five cases, complete each case before its statute of limitations expires.",
- "reward_description": "### Rewards\n\nYou score points for completing your tasks like gathering evidence, nabbing questionable characters or closing cases etc. You lose points if you get hit or if your auto is on a pothole.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376).\n"
+ "reward_description": "## Rewards\nYou score points for completing your tasks like gathering evidence, nabbing questionable characters or closing cases etc. You lose points if you get hit or if your auto is on a pothole.For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376)."
},
"qbert": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=1224",
"env_description": "You are Q*bert. Your goal is to change the color of all the cubes on the pyramid to the pyramid's 'destination' color. To do this, you must hop on each cube on the pyramid one at a time while avoiding nasty creatures that lurk there.",
- "reward_description": "### Rewards\n\nYou score points for changing color of the cubes to their destination colors or by defeating enemies. You also gain points for completing a level.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL).\n"
+ "reward_description": "## Rewards\nYou score points for changing color of the cubes to their destination colors or by defeating enemies. You also gain points for completing a level. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL)."
},
"riverraid": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=409",
- "env_description": "You control a jet that flies over a river: you can move it sideways and fire missiles to destroy enemy objects. Each time an enemy object is destroyed you score points (i.e. rewards).You lose a jet when you run out of fuel: fly over a fuel depot when you begin to run low.You lose a jet even when it collides with the river bank or one of the enemy objects (except fuel depots).The game begins with a squadron of three jets in reserve and you're given an additional jet (up to 9) for each 10,000 points you score.",
- "reward_description": "### Rewards\n\nScore points are your only reward. You get score points each time you destroy an enemy object:\n\n| Enemy Object | Score Points |\n|--------------|--------------|\n| Tanker | 30 |\n| Helicopter | 60 |\n| Fuel Depot | 80 |\n| Jet | 100 |\n| Bridge | 500 |\n\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409).\n"
+ "env_description": "You control a jet that flies over a river: you can move it sideways and fire missiles to destroy enemy objects. Each time an enemy object is destroyed you score points (i.e. rewards).You lose a jet when you run out of fuel: fly over a fuel depot when you begin to run low. You lose a jet even when it collides with the river bank or one of the enemy objects (except fuel depots).The game begins with a squadron of three jets in reserve and you're given an additional jet (up to 9) for each 10,000 points you score.",
+ "reward_description": "## Rewards\nScore points are your only reward. You get score points each time you destroy an enemy object:\n| Enemy Object | Score Points |\n|--------------|--------------|\n| Tanker | 30 |\n| Helicopter | 60 |\n| Fuel Depot | 80 |\n| Jet | 100 |\n| Bridge | 500 |\nFor a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409)."
},
"road_runner": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=412",
- "env_description": "You control the Road Runner(TM) in a race; you can control the direction to run in and times to jumps.The goal is to outrun Wile E. Coyote(TM) while avoiding the hazards of the desert.The game begins with three lives. You lose a life when the coyote catches you, picks you up in a rocket, or shoots you with a cannon. You also lose a life when a truck hits you, you hit a land mine, you fall off a cliff,or you get hit by a falling rock.You score points (i.e. rewards) by eating seeds along the road, eating steel shot, and destroying the coyote.",
- "reward_description": "### Rewards\n\nScore points are your only reward. You get score points each time you:\n\n| actions | points |\n|-------------------------------------------------------|--------|\n| eat a pile of birdseed | 100 |\n| eat steel shot | 100 |\n| get the coyote hit by a mine (cannonball, rock, etc.) | 200 |\n| get the coyote hit by a truck | 1000 |\n\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412).\n"
+ "env_description": "You control the Road Runner(TM) in a race; you can control the direction to run in and times to jumps. The goal is to outrun Wile E. Coyote(TM) while avoiding the hazards of the desert. The game begins with three lives. You lose a life when the coyote catches you, picks you up in a rocket, or shoots you with a cannon. You also lose a life when a truck hits you, you hit a land mine, you fall off a cliff,or you get hit by a falling rock. You score points (i.e. rewards) by eating seeds along the road, eating steel shot, and destroying the coyote.",
+ "reward_description": "## Rewards\nScore points are your only reward. You get score points each time you:\n| actions | points |\n|-------------------------------------------------------|--------|\n| eat a pile of birdseed | 100 |\n| eat steel shot | 100 |\n| get the coyote hit by a mine (cannonball, rock, etc.) | 200 |\n| get the coyote hit by a truck | 1000 |\nFor a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412)."
},
"robotank": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=416",
- "env_description": "You control your Robot Tanks to destroy enemies and avoid enemy fire.Game ends when all of your Robot Tanks are destroyed or all 12 enemy squadrons are destroyed.The game begins with one active Robot Tank and three reserves.Your Robot Tank may get lost when it is hit by enemy rocket fire - your video scrambles with static interference when this happens - or just become damaged - sensors report the damage by flashing on your control panel (look at V/C/R/T squares).You earn one bonus Robot Tank for every enemy squadron destroyed. The maximum number of bonus Robot Tanks allowed at any one time is 12.",
- "reward_description": "### Rewards\n\nThe number of enemies destroyed is the only reward.\n\nA small tank appears at the top of your screen for each enemy\n you destroy. A square with the number 12 appears each time a squadron of twelve enemies are\n destroyed.\n\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416).\n"
+ "env_description": "You control your Robot Tanks to destroy enemies and avoid enemy fire. Game ends when all of your Robot Tanks are destroyed or all 12 enemy squadrons are destroyed. The game begins with one active Robot Tank and three reserves. Your Robot Tank may get lost when it is hit by enemy rocket fire - your video scrambles with static interference when this happens - or just become damaged - sensors report the damage by flashing on your control panel (look at V/C/R/T squares).You earn one bonus Robot Tank for every enemy squadron destroyed. The maximum number of bonus Robot Tanks allowed at any one time is 12.",
+ "reward_description": "## Rewards\nThe number of enemies destroyed is the only reward. A small tank appears at the top of your screen for each enemy you destroy. A square with the number 12 appears each time a squadron of twelve enemies are destroyed. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416)."
},
"seaquest": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=424",
- "env_description": "You control a sub able to move in all directions and fire torpedoes.The goal is to retrieve as many divers as you can, while dodging and blasting enemy subs and killer sharks; points will be awarded accordingly.The game begins with one sub and three waiting on the horizon. Each time you increase your score by 10,000 points, an extra sub will be delivered to yourbase. You can only have six reserve subs on the screen at one time.Your sub will explode if it collides with anything except your own divers.The sub has a limited amount of oxygen that decreases at a constant rate during the game. When the oxygen tank is almost empty, you need to surface and if you don't do it in time, your sub will blow up and you'll lose one diver. Each time you're forced to surface, with less than six divers, you lose one diver as well.",
- "reward_description": "### Rewards\n\nScore points are your only reward.\n\nBlasting enemy sub and killer shark is worth\n20 points. Every time you surface with six divers, the value of enemy subs\nand killer sharks increases by 10, up to a maximum of 90 points each.\n\nRescued divers start at 50 points each. Then, their point value increases by 50, every\ntime you surface, up to a maximum of 1000 points each.\n\nYou'll be further rewarded with bonus points for all the oxygen you have remaining the\nmoment you surface. The more oxygen you have left, the more bonus points\nyou're given.\n\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424).\n"
+ "env_description": "You control a sub able to move in all directions and fire torpedoes. The goal is to retrieve as many divers as you can, while dodging and blasting enemy subs and killer sharks; points will be awarded accordingly. The game begins with one sub and three waiting on the horizon. Each time you increase your score by 10,000 points, an extra sub will be delivered to yourbase. You can only have six reserve subs on the screen at one time. Your sub will explode if it collides with anything except your own divers. The sub has a limited amount of oxygen that decreases at a constant rate during the game. When the oxygen tank is almost empty, you need to surface and if you don't do it in time, your sub will blow up and you'll lose one diver. Each time you're forced to surface, with less than six divers, you lose one diver as well.",
+ "reward_description": "## Rewards\nScore points are your only reward. Blasting enemy sub and killer shark is worth 20 points. Every time you surface with six divers, the value of enemy subs and killer sharks increases by 10, up to a maximum of 90 points each. Rescued divers start at 50 points each. Then, their point value increases by 50, every time you surface, up to a maximum of 1000 points each. You'll be further rewarded with bonus points for all the oxygen you have remaining the moment you surface. The more oxygen you have left, the more bonus points you're given. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424)."
},
"sir_lancelot": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=431",
@@ -401,18 +401,18 @@
},
"skiing": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=434",
- "env_description": "You control a skier who can move sideways.The goal is to run through all gates (between the poles) in the fastest time.You are penalized five seconds for each gate you miss.If you hit a gate or a tree, your skier will jump back up and keep going.",
- "reward_description": "### Rewards\n\nSeconds are your only rewards - negative rewards and penalties (e.g. missing a gate) are assigned as additional seconds.\n\nFor a more detailed documentation, see [the AtariAge page [SLALOM RACING section]](https://atariage.com/manual_html_page.php?SoftwareLabelID=434).\n"
+ "env_description": "You control a skier who can move sideways.The goal is to run through all gates (between the poles) in the fastest time. You are penalized five seconds for each gate you miss. If you hit a gate or a tree, your skier will jump back up and keep going.",
+ "reward_description": "## Rewards\nSeconds are your only rewards - negative rewards and penalties (e.g. missing a gate) are assigned as additional seconds. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=434) in particular the Slalom racing section."
},
"solaris": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=450",
"env_description": "You control a spaceship. Blast enemies before they can blast you. You can warp to different sectors. You have to defend Federation planets, and destroy Zylon forces. Keep track of your fuel, if you run out you lose a life. Warp to a Federation planet to refuel. The game ends if all your ships are destroyed or if you reach the Solaris planet.",
- "reward_description": "### Rewards\n\nYou gain points for destroying enemies, rescuing cadets, making it through a corridor, destroying enemy planets etc.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450).\n"
+ "reward_description": "## Rewards\nYou gain points for destroying enemies, rescuing cadets, making it through a corridor, destroying enemy planets etc. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450)."
},
"space_invaders": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=460",
"env_description": "Your objective is to destroy the space invaders by shooting your laser cannon at them before they reach the Earth. The game ends when all your lives are lost after taking enemy fire, or when they reach the earth.",
- "reward_description": "### Rewards\n\nYou gain points for destroying space invaders. The invaders in the back rows are worth more points.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460).\n"
+ "reward_description": "## Rewards\nYou gain points for destroying space invaders. The invaders in the back rows are worth more points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460)."
},
"space_war": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=470",
@@ -422,7 +422,7 @@
"star_gunner": {
"atariage_url": "http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html",
"env_description": "Stop the alien invasion by shooting down alien saucers and creatures while avoiding bombs.",
- "reward_description": "### Rewards\n\nYou score points for destroying enemies. You get bonus points for clearing a wave and a level.\nFor a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html).\n"
+ "reward_description": "## Rewards\nYou score points for destroying enemies. You get bonus points for clearing a wave and a level. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html)."
},
"superman": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=533",
@@ -436,8 +436,8 @@
},
"tennis": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=555",
- "env_description": "You control the orange player playing against a computer-controlled blue player. The game follows the rules of tennis.The first player to win at least 6 games with a margin of at least two games wins the match. If the score is tied at 6-6, the first player to go 2 games up wins the match.",
- "reward_description": "### Rewards\n\nThe scoring is as per the sport of tennis, played till one set.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555).\n"
+ "env_description": "You control the orange player playing against a computer-controlled blue player. The game follows the rules of tennis. The first player to win at least 6 games with a margin of at least two games wins the match. If the score is tied at 6-6, the first player to go 2 games up wins the match.",
+ "reward_description": "## Rewards\nThe scoring is as per the sport of tennis, played till one set. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555)."
},
"tetris": {
"atariage_url": "",
@@ -446,13 +446,13 @@
},
"tic_tac_toe_3d": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=521",
- "env_description": "Players take turns placing their mark (an X or an O) on a 3-dimensional, 4 x 4 x 4 grid in an attempt to get 4 in a row before their opponent does.",
+ "env_description": "Players take turns placing their mark (an X or an O) on a 3-dimensional, 4x4x4 grid in an attempt to get 4 in a row before their opponent does.",
"reward_description": ""
},
"time_pilot": {
"atariage_url": "http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html",
"env_description": "You control an aircraft. Use it to destroy your enemies. As you progress in the game, you encounter enemies with technology that is increasingly from the future.",
- "reward_description": "### Rewards\n\nYou score points for destroying enemies, gaining more points for difficult enemies.\nFor a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html).\n"
+ "reward_description": "## Rewards\nYou score points for destroying enemies, gaining more points for difficult enemies. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html)."
},
"trondead": {
"atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=569",
diff --git a/docs/_scripts/gen_atari_table.py b/docs/_scripts/gen_atari_table.py
index e7ff9d6ea..1cd6f2841 100644
--- a/docs/_scripts/gen_atari_table.py
+++ b/docs/_scripts/gen_atari_table.py
@@ -9,6 +9,11 @@
import gymnasium
+# Necessary for v1.0.0 without ale-py gymnasium support
+# from shimmy import registration
+# registration._register_atari_envs()
+
+
# # Generate the list of all atari games on atari.md
for rom_id in sorted(ALL_ATARI_GAMES):
print(f"atari/{rom_id}")
@@ -51,7 +56,7 @@ def shortened_repr(values):
for rom_id in tqdm(ALL_ATARI_GAMES):
env_name = rom_utils.rom_id_to_name(rom_id)
- env = gymnasium.make(f"ALE/{env_name}-v5")
+ env = gymnasium.make(f"ALE/{env_name}-v5").unwrapped
available_difficulties = env.ale.getAvailableDifficulties()
default_difficulty = env.ale.cloneState().getDifficulty()
@@ -83,7 +88,7 @@ def shortened_repr(values):
for rom_id in tqdm(ALL_ATARI_GAMES):
env_name = rom_utils.rom_id_to_name(rom_id)
- env = gymnasium.make(f"ALE/{env_name}-v5")
+ env = gymnasium.make(f"ALE/{env_name}-v5").unwrapped
if rom_id in atari_data:
env_data = atari_data[rom_id]
@@ -211,6 +216,7 @@ def shortened_repr(values):
See variants section for the type of observation used by each environment id by default.
{reward_description}
+
## Variants
{env_name} has the following variants of the environment id which have the following differences in observation,
@@ -230,7 +236,7 @@ def shortened_repr(values):
A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments.
-* v5: Stickiness was added back and stochastic frameskipping was removed. The environments are now in the "ALE" namespace.
+* v5: Stickiness was added back and stochastic frame-skipping was removed. The environments are now in the "ALE" namespace.
* v4: Stickiness of actions was removed
* v0: Initial versions release
"""
diff --git a/docs/_scripts/gen_gifs.py b/docs/_scripts/gen_gifs.py
index 0efac644e..ec9aee361 100644
--- a/docs/_scripts/gen_gifs.py
+++ b/docs/_scripts/gen_gifs.py
@@ -17,9 +17,6 @@
"FrozenLake8x8",
"LunarLanderContinuous",
"BipedalWalkerHardcore",
- "CartPoleJax",
- "PendulumJax",
- "Jax-Blackjack",
]
for env_spec in gym.registry.values():
if env_spec.name in exclude_env_names:
diff --git a/docs/_static/videos/box2d/bipedal_walker.gif b/docs/_static/videos/box2d/bipedal_walker.gif
index a8f8192c2..418d6f958 100644
Binary files a/docs/_static/videos/box2d/bipedal_walker.gif and b/docs/_static/videos/box2d/bipedal_walker.gif differ
diff --git a/docs/_static/videos/box2d/car_racing.gif b/docs/_static/videos/box2d/car_racing.gif
index 4f12c736c..88e67d795 100644
Binary files a/docs/_static/videos/box2d/car_racing.gif and b/docs/_static/videos/box2d/car_racing.gif differ
diff --git a/docs/_static/videos/box2d/lunar_lander.gif b/docs/_static/videos/box2d/lunar_lander.gif
index 700acf65a..94a9060ba 100644
Binary files a/docs/_static/videos/box2d/lunar_lander.gif and b/docs/_static/videos/box2d/lunar_lander.gif differ
diff --git a/docs/_static/videos/classic_control/acrobot.gif b/docs/_static/videos/classic_control/acrobot.gif
index 3f367f4ea..6241b7b54 100644
Binary files a/docs/_static/videos/classic_control/acrobot.gif and b/docs/_static/videos/classic_control/acrobot.gif differ
diff --git a/docs/_static/videos/classic_control/cart_pole.gif b/docs/_static/videos/classic_control/cart_pole.gif
index 96365f6a7..10d495dfb 100644
Binary files a/docs/_static/videos/classic_control/cart_pole.gif and b/docs/_static/videos/classic_control/cart_pole.gif differ
diff --git a/docs/_static/videos/classic_control/mountain_car.gif b/docs/_static/videos/classic_control/mountain_car.gif
index 5eeb6e8af..33dbe9838 100644
Binary files a/docs/_static/videos/classic_control/mountain_car.gif and b/docs/_static/videos/classic_control/mountain_car.gif differ
diff --git a/docs/_static/videos/classic_control/mountain_car_continuous.gif b/docs/_static/videos/classic_control/mountain_car_continuous.gif
index 846d8f460..91b564402 100644
Binary files a/docs/_static/videos/classic_control/mountain_car_continuous.gif and b/docs/_static/videos/classic_control/mountain_car_continuous.gif differ
diff --git a/docs/_static/videos/classic_control/pendulum.gif b/docs/_static/videos/classic_control/pendulum.gif
index c6f22d183..09c4eae0d 100644
Binary files a/docs/_static/videos/classic_control/pendulum.gif and b/docs/_static/videos/classic_control/pendulum.gif differ
diff --git a/docs/_static/videos/mujoco/ant.gif b/docs/_static/videos/mujoco/ant.gif
index 24d5eacea..d9892785a 100644
Binary files a/docs/_static/videos/mujoco/ant.gif and b/docs/_static/videos/mujoco/ant.gif differ
diff --git a/docs/_static/videos/mujoco/half_cheetah.gif b/docs/_static/videos/mujoco/half_cheetah.gif
index a1fcdf314..2db6ca568 100644
Binary files a/docs/_static/videos/mujoco/half_cheetah.gif and b/docs/_static/videos/mujoco/half_cheetah.gif differ
diff --git a/docs/_static/videos/mujoco/hopper.gif b/docs/_static/videos/mujoco/hopper.gif
index 181205ac0..9b50e7d0b 100644
Binary files a/docs/_static/videos/mujoco/hopper.gif and b/docs/_static/videos/mujoco/hopper.gif differ
diff --git a/docs/_static/videos/mujoco/humanoid.gif b/docs/_static/videos/mujoco/humanoid.gif
index 949de7319..93e5fa951 100644
Binary files a/docs/_static/videos/mujoco/humanoid.gif and b/docs/_static/videos/mujoco/humanoid.gif differ
diff --git a/docs/_static/videos/mujoco/humanoid_standup.gif b/docs/_static/videos/mujoco/humanoid_standup.gif
index bb118ad07..40ac612f6 100644
Binary files a/docs/_static/videos/mujoco/humanoid_standup.gif and b/docs/_static/videos/mujoco/humanoid_standup.gif differ
diff --git a/docs/_static/videos/mujoco/inverted_double_pendulum.gif b/docs/_static/videos/mujoco/inverted_double_pendulum.gif
index 5fbc35153..82a07f2e6 100644
Binary files a/docs/_static/videos/mujoco/inverted_double_pendulum.gif and b/docs/_static/videos/mujoco/inverted_double_pendulum.gif differ
diff --git a/docs/_static/videos/mujoco/inverted_pendulum.gif b/docs/_static/videos/mujoco/inverted_pendulum.gif
index 62d9abb18..b5792b5fe 100644
Binary files a/docs/_static/videos/mujoco/inverted_pendulum.gif and b/docs/_static/videos/mujoco/inverted_pendulum.gif differ
diff --git a/docs/_static/videos/mujoco/pusher.gif b/docs/_static/videos/mujoco/pusher.gif
index a1dca8dcb..c9058cb30 100644
Binary files a/docs/_static/videos/mujoco/pusher.gif and b/docs/_static/videos/mujoco/pusher.gif differ
diff --git a/docs/_static/videos/mujoco/reacher.gif b/docs/_static/videos/mujoco/reacher.gif
index d6d5fa4fa..eaed0127f 100644
Binary files a/docs/_static/videos/mujoco/reacher.gif and b/docs/_static/videos/mujoco/reacher.gif differ
diff --git a/docs/_static/videos/mujoco/swimmer.gif b/docs/_static/videos/mujoco/swimmer.gif
index db679e167..27a6089bb 100644
Binary files a/docs/_static/videos/mujoco/swimmer.gif and b/docs/_static/videos/mujoco/swimmer.gif differ
diff --git a/docs/_static/videos/mujoco/walker2d.gif b/docs/_static/videos/mujoco/walker2d.gif
index d6d256c36..81137f178 100644
Binary files a/docs/_static/videos/mujoco/walker2d.gif and b/docs/_static/videos/mujoco/walker2d.gif differ
diff --git a/docs/_static/videos/toy_text/blackjack.gif b/docs/_static/videos/toy_text/blackjack.gif
index c88b5fb07..5c5ab2058 100644
Binary files a/docs/_static/videos/toy_text/blackjack.gif and b/docs/_static/videos/toy_text/blackjack.gif differ
diff --git a/docs/_static/videos/toy_text/cliff_walking.gif b/docs/_static/videos/toy_text/cliff_walking.gif
index 41978369d..7aef17a2d 100644
Binary files a/docs/_static/videos/toy_text/cliff_walking.gif and b/docs/_static/videos/toy_text/cliff_walking.gif differ
diff --git a/docs/_static/videos/toy_text/frozen_lake.gif b/docs/_static/videos/toy_text/frozen_lake.gif
index 9af9274de..302b41476 100644
Binary files a/docs/_static/videos/toy_text/frozen_lake.gif and b/docs/_static/videos/toy_text/frozen_lake.gif differ
diff --git a/docs/_static/videos/toy_text/taxi.gif b/docs/_static/videos/toy_text/taxi.gif
index aecde4a2a..077094c98 100644
Binary files a/docs/_static/videos/toy_text/taxi.gif and b/docs/_static/videos/toy_text/taxi.gif differ
diff --git a/docs/api/env.md b/docs/api/env.md
index af3345b7e..fd5125a3b 100644
--- a/docs/api/env.md
+++ b/docs/api/env.md
@@ -54,6 +54,7 @@ title: Env
.. autoproperty:: gymnasium.Env.unwrapped
.. autoproperty:: gymnasium.Env.np_random
+.. autoproperty:: gymnasium.Env.np_random_seed
```
## Implementing environments
@@ -61,7 +62,7 @@ title: Env
```{eval-rst}
.. py:currentmodule:: gymnasium
-When implementing an environment, the :meth:`Env.reset` and :meth:`Env.step` functions much be created describing the dynamics of the environment. For more information see the environment creation tutorial.
+When implementing an environment, the :meth:`Env.reset` and :meth:`Env.step` functions must be created to describe the dynamics of the environment. For more information, see the environment creation tutorial.
```
## Creating environments
diff --git a/docs/api/spaces.md b/docs/api/spaces.md
index 791cf43ac..9f749a66a 100644
--- a/docs/api/spaces.md
+++ b/docs/api/spaces.md
@@ -9,7 +9,6 @@ title: Spaces
spaces/fundamental
spaces/composite
spaces/utils
-vector/utils
```
```{eval-rst}
diff --git a/docs/api/vector.md b/docs/api/vector.md
index 0d1887397..c1e3e7b86 100644
--- a/docs/api/vector.md
+++ b/docs/api/vector.md
@@ -67,6 +67,7 @@ vector/utils
```{eval-rst}
.. autoproperty:: gymnasium.vector.VectorEnv.unwrapped
.. autoproperty:: gymnasium.vector.VectorEnv.np_random
+.. autoproperty:: gymnasium.vector.VectorEnv.np_random_seed
```
## Making Vector Environments
diff --git a/docs/api/vector/async_vector_env.md b/docs/api/vector/async_vector_env.md
index a0368419e..14fdf5c50 100644
--- a/docs/api/vector/async_vector_env.md
+++ b/docs/api/vector/async_vector_env.md
@@ -11,3 +11,10 @@
.. automethod:: gymnasium.vector.AsyncVectorEnv.get_attr
.. automethod:: gymnasium.vector.AsyncVectorEnv.set_attr
```
+
+### Additional Methods
+
+```{eval-rst}
+.. autoproperty:: gymnasium.vector.VectorEnv.np_random
+.. autoproperty:: gymnasium.vector.VectorEnv.np_random_seed
+```
diff --git a/docs/api/vector/sync_vector_env.md b/docs/api/vector/sync_vector_env.md
index 3855e4820..1295f59f9 100644
--- a/docs/api/vector/sync_vector_env.md
+++ b/docs/api/vector/sync_vector_env.md
@@ -11,3 +11,10 @@
.. automethod:: gymnasium.vector.SyncVectorEnv.get_attr
.. automethod:: gymnasium.vector.SyncVectorEnv.set_attr
```
+
+### Additional Methods
+
+```{eval-rst}
+.. autoproperty:: gymnasium.vector.VectorEnv.np_random
+.. autoproperty:: gymnasium.vector.VectorEnv.np_random_seed
+```
diff --git a/docs/api/wrappers.md b/docs/api/wrappers.md
index 000faf061..9a3d1e50b 100644
--- a/docs/api/wrappers.md
+++ b/docs/api/wrappers.md
@@ -12,7 +12,6 @@ wrappers/misc_wrappers
wrappers/action_wrappers
wrappers/observation_wrappers
wrappers/reward_wrappers
-wrappers/vector_wrappers
```
```{eval-rst}
@@ -48,5 +47,6 @@ wrappers/vector_wrappers
.. autoproperty:: gymnasium.Wrapper.spec
.. autoproperty:: gymnasium.Wrapper.metadata
.. autoproperty:: gymnasium.Wrapper.np_random
+.. autoproperty:: gymnasium.Wrapper.np_random_seed
.. autoproperty:: gymnasium.Wrapper.unwrapped
```
diff --git a/docs/api/wrappers/observation_wrappers.md b/docs/api/wrappers/observation_wrappers.md
index 10284aca0..631d53203 100644
--- a/docs/api/wrappers/observation_wrappers.md
+++ b/docs/api/wrappers/observation_wrappers.md
@@ -18,7 +18,7 @@
.. autoclass:: gymnasium.wrappers.GrayscaleObservation
.. autoclass:: gymnasium.wrappers.MaxAndSkipObservation
.. autoclass:: gymnasium.wrappers.NormalizeObservation
-.. autoclass:: gymnasium.wrappers.RenderObservation
+.. autoclass:: gymnasium.wrappers.AddRenderObservation
.. autoclass:: gymnasium.wrappers.ResizeObservation
.. autoclass:: gymnasium.wrappers.ReshapeObservation
.. autoclass:: gymnasium.wrappers.RescaleObservation
diff --git a/docs/api/wrappers/table.md b/docs/api/wrappers/table.md
index 240b9f1f9..be540339f 100644
--- a/docs/api/wrappers/table.md
+++ b/docs/api/wrappers/table.md
@@ -56,7 +56,7 @@ wrapper in the page on the wrapper type
- Records videos of environment episodes using the environment's render function.
* - :class:`RenderCollection`
- Collect rendered frames of an environment such ``render`` returns a ``list[RenderedFrame]``.
- * - :class:`RenderObservation`
+ * - :class:`AddRenderObservation`
- Includes the rendered observations in the environment's observations.
* - :class:`RescaleAction`
- Affinely (linearly) rescales a ``Box`` action space of the environment to within the range of ``[min_action, max_action]``.
diff --git a/docs/environments/atari/adventure.md b/docs/environments/atari/adventure.md
index a06d0fcc4..b4326a0fe 100644
--- a/docs/environments/atari/adventure.md
+++ b/docs/environments/atari/adventure.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Adventure has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/air_raid.md b/docs/environments/atari/air_raid.md
index 0cb79cfd5..498c6b962 100644
--- a/docs/environments/atari/air_raid.md
+++ b/docs/environments/atari/air_raid.md
@@ -45,6 +45,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
AirRaid has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/alien.md b/docs/environments/atari/alien.md
index 81c57b90d..b672dfe29 100644
--- a/docs/environments/atari/alien.md
+++ b/docs/environments/atari/alien.md
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points by destroying eggs, killing aliens, using pulsars, and collecting special prizes. When you are caught
-by an alien, you will lose one of your lives. The number of lives you have depends on the game flavor. For a
-table of scores corresponding to the different achievements, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815).
+## Rewards
+You score points by destroying eggs, killing aliens, using pulsars, and collecting special prizes. When you are caught by an alien, you will lose one of your lives. The number of lives you have depends on the game flavor. For a table of scores corresponding to the different achievements, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815).
## Variants
diff --git a/docs/environments/atari/amidar.md b/docs/environments/atari/amidar.md
index 3ef9357ff..a6624aaae 100644
--- a/docs/environments/atari/amidar.md
+++ b/docs/environments/atari/amidar.md
@@ -48,10 +48,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points by traversing new parts of the grid. Coloring an entire box in the maze or catching chickens gives extra points.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817).
+## Rewards
+You score points by traversing new parts of the grid. Coloring an entire box in the maze or catching chickens gives extra points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817).
## Variants
diff --git a/docs/environments/atari/assault.md b/docs/environments/atari/assault.md
index b57e658ac..810c22bf5 100644
--- a/docs/environments/atari/assault.md
+++ b/docs/environments/atari/assault.md
@@ -21,7 +21,7 @@ For more Assault variants with different observation and action spaces, see the
## Description
-You control a vehicle that can move sideways. A big mother ship circles overhead and continually deploys smaller drones.You must destroy these enemies and dodge their attacks.
+You control a vehicle that can move sideways. A big mother ship circles overhead and continually deploys smaller drones. You must destroy these enemies and dodge their attacks.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=827)
@@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Assault has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/asterix.md b/docs/environments/atari/asterix.md
index 21599d014..66232ae91 100644
--- a/docs/environments/atari/asterix.md
+++ b/docs/environments/atari/asterix.md
@@ -47,9 +47,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-A table of scores awarded for collecting the different objects is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325).
+## Rewards
+A table of scores awarded for collecting the different objects is provided on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325).
## Variants
diff --git a/docs/environments/atari/asteroids.md b/docs/environments/atari/asteroids.md
index a94cbd8bf..a1be05d51 100644
--- a/docs/environments/atari/asteroids.md
+++ b/docs/environments/atari/asteroids.md
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points for destroying asteroids, satellites and UFOs. The smaller the asteroid, the more points you score
-for destroying it.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL).
+## Rewards
+You score points for destroying asteroids, satellites and UFOs. The smaller the asteroid, the more points you score for destroying it. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL).
## Variants
diff --git a/docs/environments/atari/atlantis.md b/docs/environments/atari/atlantis.md
index 4348bd59b..e9e19f952 100644
--- a/docs/environments/atari/atlantis.md
+++ b/docs/environments/atari/atlantis.md
@@ -21,7 +21,7 @@ For more Atlantis variants with different observation and action spaces, see the
## Description
-Your job is to defend the submerged city of Atlantis. Your enemies slowly descend towards the city and you must destroy them before they reach striking distance. To this end, you control three defense posts.You lose if your enemies manage to destroy all seven of Atlantis' installations. You may rebuild installations after you have fought of a wave of enemies and scored a sufficient number of points.
+Your job is to defend the submerged city of Atlantis. Your enemies slowly descend towards the city and you must destroy them before they reach striking distance. To this end, you control three defense posts. You lose if your enemies manage to destroy all seven of Atlantis' installations. You may rebuild installations after you have fought of a wave of enemies and scored a sufficient number of points.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835)
@@ -46,11 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points for destroying enemies, keeping installations protected during attack waves. You score more points
-if you manage to destroy your enemies with one of the outer defense posts.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835).
+## Rewards
+You score points for destroying enemies, keeping installations protected during attack waves. You score more points if you manage to destroy your enemies with one of the outer defense posts. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835).
## Variants
diff --git a/docs/environments/atari/atlantis2.md b/docs/environments/atari/atlantis2.md
index 30b09f7ef..ad32dafd7 100644
--- a/docs/environments/atari/atlantis2.md
+++ b/docs/environments/atari/atlantis2.md
@@ -45,6 +45,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Atlantis2 has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/backgammon.md b/docs/environments/atari/backgammon.md
index 5d00fa245..24f2235e2 100644
--- a/docs/environments/atari/backgammon.md
+++ b/docs/environments/atari/backgammon.md
@@ -46,6 +46,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Backgammon has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/bank_heist.md b/docs/environments/atari/bank_heist.md
index 7c199f964..a07da8ea6 100644
--- a/docs/environments/atari/bank_heist.md
+++ b/docs/environments/atari/bank_heist.md
@@ -21,7 +21,7 @@ For more BankHeist variants with different observation and action spaces, see th
## Description
-You are a bank robber and (naturally) want to rob as many banks as possible. You control your getaway car and must navigate maze-like cities. The police chases you and will appear whenever you rob a bank. You may destroy police cars by dropping sticks of dynamite. You can fill up your gas tank by entering a new city.At the beginning of the game you have four lives. Lives are lost if you run out of gas, are caught by the police,or run over the dynamite you have previously dropped.
+You are a bank robber and (naturally) want to rob as many banks as possible. You control your getaway car and must navigate maze-like cities. The police chases you and will appear whenever you rob a bank. You may destroy police cars by dropping sticks of dynamite. You can fill up your gas tank by entering a new city. At the beginning of the game you have four lives. Lives are lost if you run out of gas, are caught by the police,or run over the dynamite you have previously dropped.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008)
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points for robbing banks and destroying police cars. If you rob nine or more banks, and then leave the city,
-you will score extra points.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008).
+## Rewards
+You score points for robbing banks and destroying police cars. If you rob nine or more banks, and then leave the city, you will score extra points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008).
## Variants
diff --git a/docs/environments/atari/basic_math.md b/docs/environments/atari/basic_math.md
index e9a454b1c..acefc3860 100644
--- a/docs/environments/atari/basic_math.md
+++ b/docs/environments/atari/basic_math.md
@@ -21,8 +21,7 @@ For more BasicMath variants with different observation and action spaces, see th
## Description
-You must solve basic math problems using a joystick
-to scroll to the correct numeric answer.
+You must solve basic math problems using a joystick to scroll to the correct numeric answer.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=14)
@@ -48,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
BasicMath has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/battle_zone.md b/docs/environments/atari/battle_zone.md
index 0b096805f..5e6634f50 100644
--- a/docs/environments/atari/battle_zone.md
+++ b/docs/environments/atari/battle_zone.md
@@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You receive points for destroying enemies.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL).
+## Rewards
+You receive points for destroying enemies. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL).
## Variants
diff --git a/docs/environments/atari/beam_rider.md b/docs/environments/atari/beam_rider.md
index 6d4a44654..a920b34fc 100644
--- a/docs/environments/atari/beam_rider.md
+++ b/docs/environments/atari/beam_rider.md
@@ -47,10 +47,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points for destroying enemies.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL).
+## Rewards
+You score points for destroying enemies. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL).
## Variants
diff --git a/docs/environments/atari/berzerk.md b/docs/environments/atari/berzerk.md
index 053c70f13..afdc8e368 100644
--- a/docs/environments/atari/berzerk.md
+++ b/docs/environments/atari/berzerk.md
@@ -21,7 +21,7 @@ For more Berzerk variants with different observation and action spaces, see the
## Description
-You are stuck in a maze with evil robots. You must destroy them and avoid touching the walls of the maze, as this will kill you. You may be awarded extra lives after scoring a sufficient number of points, depending on the game mode.You may also be chased by an undefeatable enemy, Evil Otto, that you must avoid. Evil Otto does not appear in the default mode.
+You are stuck in a maze with evil robots. You must destroy them and avoid touching the walls of the maze, as this will kill you. You may be awarded extra lives after scoring a sufficient number of points, depending on the game mode. You may also be chased by an undefeatable enemy, Evil Otto, that you must avoid. Evil Otto does not appear in the default mode.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=866)
@@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points for destroying robots.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL).
+## Rewards
+You score points for destroying robots. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL).
## Variants
diff --git a/docs/environments/atari/blackjack.md b/docs/environments/atari/blackjack.md
index 70dff74dc..e4a59b3b3 100644
--- a/docs/environments/atari/blackjack.md
+++ b/docs/environments/atari/blackjack.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Blackjack has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/bowling.md b/docs/environments/atari/bowling.md
index 32982c8b5..ccdaaee56 100644
--- a/docs/environments/atari/bowling.md
+++ b/docs/environments/atari/bowling.md
@@ -46,12 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You receive points for knocking down pins. The exact score depends on whether you manage a "strike", "spare" or "open"
-frame. Moreover, the points you score for one frame may depend on following frames.
-You can score up to 300 points in one game (if you manage to do 12 strikes).
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879).
+## Rewards
+You receive points for knocking down pins. The exact score depends on whether you manage a "strike", "spare" or "open" frame. Moreover, the points you score for one frame may depend on following frames. You can score up to 300 points in one game (if you manage to do 12 strikes). For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879).
## Variants
diff --git a/docs/environments/atari/boxing.md b/docs/environments/atari/boxing.md
index f0d2620bc..e6533a187 100644
--- a/docs/environments/atari/boxing.md
+++ b/docs/environments/atari/boxing.md
@@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points by landing punches.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882).
+## Rewards
+You score points by landing punches. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882).
## Variants
diff --git a/docs/environments/atari/breakout.md b/docs/environments/atari/breakout.md
index 42b0ba1e5..7fc8a9585 100644
--- a/docs/environments/atari/breakout.md
+++ b/docs/environments/atari/breakout.md
@@ -46,10 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points by destroying bricks in the wall. The reward for destroying a brick depends on the color of the brick.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889).
+## Rewards
+You score points by destroying bricks in the wall. The reward for destroying a brick depends on the color of the brick. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889).
## Variants
diff --git a/docs/environments/atari/carnival.md b/docs/environments/atari/carnival.md
index eb9112874..c897d6611 100644
--- a/docs/environments/atari/carnival.md
+++ b/docs/environments/atari/carnival.md
@@ -46,11 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points by destroying targets. Points (or bullets) may be subtracted if you hit the target when it shows a minus sign.
-You will score extra points if it shows a plus sign!
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908).
+## Rewards
+You score points by destroying targets. Points (or bullets) may be subtracted if you hit the target when it shows a minus sign. You will score extra points if it shows a plus sign! For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908).
## Variants
diff --git a/docs/environments/atari/casino.md b/docs/environments/atari/casino.md
index 8460c0ffc..a88002de8 100644
--- a/docs/environments/atari/casino.md
+++ b/docs/environments/atari/casino.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Casino has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/centipede.md b/docs/environments/atari/centipede.md
index 6e30d39f5..5d9e09fe8 100644
--- a/docs/environments/atari/centipede.md
+++ b/docs/environments/atari/centipede.md
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points by hitting centipedes, scorpions, fleas and spiders. Additional points are awarded after every round
-(i.e. after you have lost a wand) for mushrooms that were not destroyed.
-Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911).
+## Rewards
+You score points by hitting centipedes, scorpions, fleas and spiders. Additional points are awarded after every round (i.e. after you have lost a wand) for mushrooms that were not destroyed. Detailed documentation can be found on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911).
## Variants
diff --git a/docs/environments/atari/chopper_command.md b/docs/environments/atari/chopper_command.md
index 1a747bd75..a51883698 100644
--- a/docs/environments/atari/chopper_command.md
+++ b/docs/environments/atari/chopper_command.md
@@ -21,7 +21,7 @@ For more ChopperCommand variants with different observation and action spaces, s
## Description
-You control a helicopter and must protect truck convoys. To that end, you need to shoot down enemy aircraft.A mini-map is displayed at the bottom of the screen.
+You control a helicopter and must protect truck convoys. To that end, you need to shoot down enemy aircraft. A mini-map is displayed at the bottom of the screen.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921)
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points by destroying planes and other helicopters. You score extra points at the end of every wave, depending on the number
-of trucks that have survived.
-Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921).
+## Rewards
+You score points by destroying planes and other helicopters. You score extra points at the end of every wave, depending on the number of trucks that have survived. Detailed documentation can be found on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921).
## Variants
diff --git a/docs/environments/atari/crazy_climber.md b/docs/environments/atari/crazy_climber.md
index 15469c4f6..9d6c0f2c3 100644
--- a/docs/environments/atari/crazy_climber.md
+++ b/docs/environments/atari/crazy_climber.md
@@ -47,9 +47,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-A table of scores awarded for completing each row of a building is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113).
+## Rewards
+A table of scores awarded for completing each row of a building is provided on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113).
## Variants
diff --git a/docs/environments/atari/crossbow.md b/docs/environments/atari/crossbow.md
index bfd28a6c0..9aece246d 100644
--- a/docs/environments/atari/crossbow.md
+++ b/docs/environments/atari/crossbow.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Crossbow has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/darkchambers.md b/docs/environments/atari/darkchambers.md
index 0912da58d..4fc9d1049 100644
--- a/docs/environments/atari/darkchambers.md
+++ b/docs/environments/atari/darkchambers.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Darkchambers has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/defender.md b/docs/environments/atari/defender.md
index 2478a76d8..10acc2d17 100644
--- a/docs/environments/atari/defender.md
+++ b/docs/environments/atari/defender.md
@@ -21,7 +21,7 @@ For more Defender variants with different observation and action spaces, see the
## Description
-Aliens attack the earth. You control a spaceship and must defend humanity by destroying alien ships and rescuing humanoids.You have three lives and three smart bombs. You lose a live when you are shot down by an alien spaceship.Points are scored by destroying enemies and retrieving humans that are being abducted. You have an unlimited number of laser missiles.
+Aliens attack the earth. You control a spaceship and must defend humanity by destroying alien ships and rescuing humanoids. You have three lives and three smart bombs. You lose a live when you are shot down by an alien spaceship. Points are scored by destroying enemies and retrieving humans that are being abducted. You have an unlimited number of laser missiles.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128)
@@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You receive points for destroying enemies, rescuing abducted humans and keeping humans alive.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128).
+## Rewards
+You receive points for destroying enemies, rescuing abducted humans and keeping humans alive. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128).
## Variants
diff --git a/docs/environments/atari/demon_attack.md b/docs/environments/atari/demon_attack.md
index 81a72a753..24e833b28 100644
--- a/docs/environments/atari/demon_attack.md
+++ b/docs/environments/atari/demon_attack.md
@@ -46,11 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-Each enemy you slay gives you points. The amount of points depends on the type of demon and which
-wave you are in. A detailed table of scores is provided on [the AtariAge
-page](https://atariage.com/manual_html_page.php?SoftwareLabelID=135).
+## Rewards
+Each enemy you slay gives you points. The amount of points depends on the type of demon and which wave you are in. A detailed table of scores is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=135).
## Variants
diff --git a/docs/environments/atari/donkey_kong.md b/docs/environments/atari/donkey_kong.md
index 7af67c0ea..64e5bf599 100644
--- a/docs/environments/atari/donkey_kong.md
+++ b/docs/environments/atari/donkey_kong.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
DonkeyKong has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/double_dunk.md b/docs/environments/atari/double_dunk.md
index 39f419a3e..6cbfbce57 100644
--- a/docs/environments/atari/double_dunk.md
+++ b/docs/environments/atari/double_dunk.md
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-Scores follow the rules of basketball. You can get either 3 points, 2 points foul line) depending
-from where you shoot. After a defensive foul, a successful shot from the foul line gives you 1
-point.
+## Rewards
+Scores follow the rules of basketball. You can get either 3 points, 2 points foul line) depending from where you shoot. After a defensive foul, a successful shot from the foul line gives you 1 point.
## Variants
diff --git a/docs/environments/atari/earthworld.md b/docs/environments/atari/earthworld.md
index 04143cf2c..c0ef39d2a 100644
--- a/docs/environments/atari/earthworld.md
+++ b/docs/environments/atari/earthworld.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Earthworld has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/elevator_action.md b/docs/environments/atari/elevator_action.md
index cf8ef1d15..e700fe099 100644
--- a/docs/environments/atari/elevator_action.md
+++ b/docs/environments/atari/elevator_action.md
@@ -21,7 +21,7 @@ For more ElevatorAction variants with different observation and action spaces, s
## Description
-You are a secret agent that must retrieve some secret documents and reach the ground level of a building by going down an elevator/stairs. Once you reach the ground level, you are picked up and taken to the next level. You are equipped with a gun to defend yourself against enemy agents waiting for you in each floor. You gather points by shooting down enemy agents and visiting apartments marked with a red door, which contain the secret documents.This is an unreleased prototype based on the arcade game.
+You are a secret agent that must retrieve some secret documents and reach the ground level of a building by going down an elevator/stairs. Once you reach the ground level, you are picked up and taken to the next level. You are equipped with a gun to defend yourself against enemy agents waiting for you in each floor. You gather points by shooting down enemy agents and visiting apartments marked with a red door, which contain the secret documents. This is an unreleased prototype based on the arcade game.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1131)
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You start with 4 lives and are awarded 100 points for each enemy shot, and 500 points for each
-secret document collected (visiting a red door). Each time you get shot you lose one life and the
-game ends when losing all lives.
+## Rewards
+You start with 4 lives and are awarded 100 points for each enemy shot, and 500 points for each secret document collected (visiting a red door). Each time you get shot you lose one life and the game ends when losing all lives.
## Variants
diff --git a/docs/environments/atari/enduro.md b/docs/environments/atari/enduro.md
index bf8b7eeb3..a1865cf40 100644
--- a/docs/environments/atari/enduro.md
+++ b/docs/environments/atari/enduro.md
@@ -47,8 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
+## Rewards
You get 1 point for each vehicle you overtake.
## Variants
diff --git a/docs/environments/atari/entombed.md b/docs/environments/atari/entombed.md
index 7e2583ef9..fcd4c7e46 100644
--- a/docs/environments/atari/entombed.md
+++ b/docs/environments/atari/entombed.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Entombed has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/et.md b/docs/environments/atari/et.md
index a5d6b7ca6..1e791649d 100644
--- a/docs/environments/atari/et.md
+++ b/docs/environments/atari/et.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Et has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/fishing_derby.md b/docs/environments/atari/fishing_derby.md
index 9997f1a89..27d360228 100644
--- a/docs/environments/atari/fishing_derby.md
+++ b/docs/environments/atari/fishing_derby.md
@@ -49,12 +49,10 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
+## Rewards
The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=182).
-Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
## Variants
FishingDerby has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/flag_capture.md b/docs/environments/atari/flag_capture.md
index f8d6079cb..79e95187e 100644
--- a/docs/environments/atari/flag_capture.md
+++ b/docs/environments/atari/flag_capture.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
FlagCapture has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/freeway.md b/docs/environments/atari/freeway.md
index 7b7de6659..1a0508877 100644
--- a/docs/environments/atari/freeway.md
+++ b/docs/environments/atari/freeway.md
@@ -45,12 +45,10 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
+## Rewards
The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=192).
-Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
## Variants
Freeway has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/frogger.md b/docs/environments/atari/frogger.md
index e503fd30b..2cc25da9c 100644
--- a/docs/environments/atari/frogger.md
+++ b/docs/environments/atari/frogger.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Frogger has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/frostbite.md b/docs/environments/atari/frostbite.md
index 25277493b..c94230adb 100644
--- a/docs/environments/atari/frostbite.md
+++ b/docs/environments/atari/frostbite.md
@@ -49,12 +49,9 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
+## Rewards
+The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=199).
-The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
-find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=199).
-
-Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
## Variants
Frostbite has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/galaxian.md b/docs/environments/atari/galaxian.md
index 03a34880f..b6f2af409 100644
--- a/docs/environments/atari/galaxian.md
+++ b/docs/environments/atari/galaxian.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Galaxian has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/gopher.md b/docs/environments/atari/gopher.md
index bb0aaae4d..522693175 100644
--- a/docs/environments/atari/gopher.md
+++ b/docs/environments/atari/gopher.md
@@ -47,12 +47,9 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
+## Rewards
+The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=218).
-The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
-find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=218).
-
-Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
## Variants
Gopher has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/gravitar.md b/docs/environments/atari/gravitar.md
index af046496f..d29a23a4e 100644
--- a/docs/environments/atari/gravitar.md
+++ b/docs/environments/atari/gravitar.md
@@ -49,12 +49,9 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
+## Rewards
+The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=223).
-The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
-find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=223).
-
-Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
## Variants
Gravitar has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/hangman.md b/docs/environments/atari/hangman.md
index 7955bad98..2d7cfdd45 100644
--- a/docs/environments/atari/hangman.md
+++ b/docs/environments/atari/hangman.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Hangman has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/haunted_house.md b/docs/environments/atari/haunted_house.md
index d1c766d97..1ff8c84d3 100644
--- a/docs/environments/atari/haunted_house.md
+++ b/docs/environments/atari/haunted_house.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
HauntedHouse has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/hero.md b/docs/environments/atari/hero.md
index 39c84aad3..04205d156 100644
--- a/docs/environments/atari/hero.md
+++ b/docs/environments/atari/hero.md
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points for shooting critters, rescuing miners, and dynamiting walls.
-Extra points are rewarded for any power remaining after rescuing a miner.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228).
+## Rewards
+You score points for shooting critters, rescuing miners, and dynamiting walls. Extra points are rewarded for any power remaining after rescuing a miner. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228).
## Variants
diff --git a/docs/environments/atari/human_cannonball.md b/docs/environments/atari/human_cannonball.md
index affd94b18..c607108c5 100644
--- a/docs/environments/atari/human_cannonball.md
+++ b/docs/environments/atari/human_cannonball.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
HumanCannonball has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/ice_hockey.md b/docs/environments/atari/ice_hockey.md
index a77a68788..6206560d8 100644
--- a/docs/environments/atari/ice_hockey.md
+++ b/docs/environments/atari/ice_hockey.md
@@ -21,7 +21,7 @@ For more IceHockey variants with different observation and action spaces, see th
## Description
-Your goal is to score as many points as possible in a standard game of Ice Hockey over a 3-minute time period. The ball is usually called "the puck".There are 32 shot angles ranging from the extreme left to the extreme right. The angles can only aim towards the opponent's goal.Just as in real hockey, you can pass the puck by shooting it off the sides of the rink. This can be really key when you're in position to score a goal.
+Your goal is to score as many points as possible in a standard game of Ice Hockey over a 3-minute time period. The ball is usually called "the puck".There are 32 shot angles ranging from the extreme left to the extreme right. The angles can only aim towards the opponent's goal. Just as in real hockey, you can pass the puck by shooting it off the sides of the rink. This can be really key when you're in position to score a goal.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241)
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points by shooting the puck into your opponent's goal. Your opponent scores in the same manner.
-There are no limits to how many points you can get per game, other than the time limit of 3-minute games.
-For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241).
+## Rewards
+You score points by shooting the puck into your opponent's goal. Your opponent scores in the same manner. There are no limits to how many points you can get per game, other than the time limit of 3-minute games.For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241).
## Variants
diff --git a/docs/environments/atari/jamesbond.md b/docs/environments/atari/jamesbond.md
index b67917e88..f717ae504 100644
--- a/docs/environments/atari/jamesbond.md
+++ b/docs/environments/atari/jamesbond.md
@@ -21,7 +21,7 @@ For more Jamesbond variants with different observation and action spaces, see th
## Description
-Your mission is to control Mr. Bond's specially designed multipurpose craft to complete a variety of missions.The craft moves forward with a right motion and slightly back with a left motion.An up or down motion causes the craft to jump or dive.You can also fire by either lobbing a bomb to the bottom of the screen or firing a fixed angle shot to the top of the screen.
+Your mission is to control Mr. Bond's specially designed multipurpose craft to complete a variety of missions. The craft moves forward with a right motion and slightly back with a left motion. An up or down motion causes the craft to jump or dive. You can also fire by either lobbing a bomb to the bottom of the screen or firing a fixed angle shot to the top of the screen.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250)
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-The game ends when you complete the last mission or when you lose the last craft. In either case, you'll receive your final score.
-There will be a rating based on your score. The highest rating in NOVICE is 006. The highest rating in AGENT is 007.
-For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250).
+## Rewards
+The game ends when you complete the last mission or when you lose the last craft. In either case, you'll receive your final score. There will be a rating based on your score. The highest rating in NOVICE is 006. The highest rating in AGENT is 007. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250).
## Variants
diff --git a/docs/environments/atari/journey_escape.md b/docs/environments/atari/journey_escape.md
index cbcd13bc0..251cf846b 100644
--- a/docs/environments/atari/journey_escape.md
+++ b/docs/environments/atari/journey_escape.md
@@ -21,7 +21,7 @@ For more JourneyEscape variants with different observation and action spaces, se
## Description
-You must lead all 5 members of JOURNEY through waves of pesky characters and backstage obstacles to the Scarab Escape Vehicle before time runs out.You must also protect $50,000 in concert cash from grasping groupies, photographers, and promoters.
+You must lead all 5 members of JOURNEY through waves of pesky characters and backstage obstacles to the Scarab Escape Vehicle before time runs out. You must also protect $50,000 in concert cash from grasping groupies, photographers, and promoters.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252)
@@ -50,11 +50,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-At the start of the game, you will have $50,000 and 60 units of time.
-Your end game score with be dependent on how much time you have remaining and who you encounter along the way.
-For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252).
+## Rewards
+At the start of the game, you will have $50,000 and 60 units of time. Your end game score with be dependent on how much time you have remaining and who you encounter along the way. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252).
## Variants
diff --git a/docs/environments/atari/kaboom.md b/docs/environments/atari/kaboom.md
index a4933394d..8c6871d89 100644
--- a/docs/environments/atari/kaboom.md
+++ b/docs/environments/atari/kaboom.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Kaboom has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/kangaroo.md b/docs/environments/atari/kangaroo.md
index 2db659ce0..ba8e757e6 100644
--- a/docs/environments/atari/kangaroo.md
+++ b/docs/environments/atari/kangaroo.md
@@ -21,7 +21,7 @@ For more Kangaroo variants with different observation and action spaces, see the
## Description
-The object of the game is to score as many points as you can while controlling Mother Kangaroo to rescue her precious baby. You start the game with three lives.During this rescue mission, Mother Kangaroo encounters many obstacles. You need to help her climb ladders, pick bonus fruit, and throw punches at monkeys.
+The object of the game is to score as many points as you can while controlling Mother Kangaroo to rescue her precious baby. You start the game with three lives. During this rescue mission, Mother Kangaroo encounters many obstacles. You need to help her climb ladders, pick bonus fruit, and throw punches at monkeys.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923)
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-Your score will be shown at the top right corner of the game.
-Your end game score with be dependent on how much time you have remaining and who you encounter along the way.
-For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923).
+## Rewards
+Your score will be shown at the top right corner of the game. Your end game score with be dependent on how much time you have remaining and who you encounter along the way. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923).
## Variants
diff --git a/docs/environments/atari/keystone_kapers.md b/docs/environments/atari/keystone_kapers.md
index 39cdc6e49..7d4f4e11d 100644
--- a/docs/environments/atari/keystone_kapers.md
+++ b/docs/environments/atari/keystone_kapers.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
KeystoneKapers has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/king_kong.md b/docs/environments/atari/king_kong.md
index 7e00cf31d..dcc776207 100644
--- a/docs/environments/atari/king_kong.md
+++ b/docs/environments/atari/king_kong.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
KingKong has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/klax.md b/docs/environments/atari/klax.md
index 62d7430cb..46af8805d 100644
--- a/docs/environments/atari/klax.md
+++ b/docs/environments/atari/klax.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Klax has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/koolaid.md b/docs/environments/atari/koolaid.md
index 21ef16c1d..fd07c8b1c 100644
--- a/docs/environments/atari/koolaid.md
+++ b/docs/environments/atari/koolaid.md
@@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Koolaid has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/krull.md b/docs/environments/atari/krull.md
index 94ebbefc9..234ce584a 100644
--- a/docs/environments/atari/krull.md
+++ b/docs/environments/atari/krull.md
@@ -21,7 +21,7 @@ For more Krull variants with different observation and action spaces, see the va
## Description
-Your mission is to find and enter the Beast's Black Fortress, rescue Princess Lyssa, and destroy the Beast.The task is not an easy one, for the location of the Black Fortress changes with each sunrise on Krull.
+Your mission is to find and enter the Beast's Black Fortress, rescue Princess Lyssa, and destroy the Beast. The task is not an easy one, for the location of the Black Fortress changes with each sunrise on Krull.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267)
@@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You will receive various scores for each monster you kill.
-You can play the game until you have lost all your lives.
-For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267).
+## Rewards
+You will receive various scores for each monster you kill. You can play the game until you have lost all your lives. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267).
## Variants
diff --git a/docs/environments/atari/kung_fu_master.md b/docs/environments/atari/kung_fu_master.md
index 1820a92ce..3c54193c9 100644
--- a/docs/environments/atari/kung_fu_master.md
+++ b/docs/environments/atari/kung_fu_master.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
KungFuMaster has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/laser_gates.md b/docs/environments/atari/laser_gates.md
index f28dd9d9f..8a363d5fa 100644
--- a/docs/environments/atari/laser_gates.md
+++ b/docs/environments/atari/laser_gates.md
@@ -21,7 +21,7 @@ For more LaserGates variants with different observation and action spaces, see t
## Description
-The Cryptic Computer is malfunctioning! Use your Dante Dart to navigate through the computer and deestroy the four Failsafe Detonators.
+The Cryptic Computer is malfunctioning! Use your Dante Dart to navigate through the computer and destroy the four Failsafe Detonators.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=271)
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
LaserGates has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/lost_luggage.md b/docs/environments/atari/lost_luggage.md
index 0c8419b2c..f9b46603c 100644
--- a/docs/environments/atari/lost_luggage.md
+++ b/docs/environments/atari/lost_luggage.md
@@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
LostLuggage has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/mario_bros.md b/docs/environments/atari/mario_bros.md
index 93b919c08..2a47c5c8b 100644
--- a/docs/environments/atari/mario_bros.md
+++ b/docs/environments/atari/mario_bros.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
MarioBros has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/miniature_golf.md b/docs/environments/atari/miniature_golf.md
index 8939fc114..59fe0b0ec 100644
--- a/docs/environments/atari/miniature_golf.md
+++ b/docs/environments/atari/miniature_golf.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
MiniatureGolf has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/montezuma_revenge.md b/docs/environments/atari/montezuma_revenge.md
index 42e4d978d..748c9681b 100644
--- a/docs/environments/atari/montezuma_revenge.md
+++ b/docs/environments/atari/montezuma_revenge.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
MontezumaRevenge has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/mr_do.md b/docs/environments/atari/mr_do.md
index c2292acbc..50900e745 100644
--- a/docs/environments/atari/mr_do.md
+++ b/docs/environments/atari/mr_do.md
@@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
MrDo has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/ms_pacman.md b/docs/environments/atari/ms_pacman.md
index 012dca5d0..716b39485 100644
--- a/docs/environments/atari/ms_pacman.md
+++ b/docs/environments/atari/ms_pacman.md
@@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
MsPacman has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/name_this_game.md b/docs/environments/atari/name_this_game.md
index 6daa449bf..ab825d665 100644
--- a/docs/environments/atari/name_this_game.md
+++ b/docs/environments/atari/name_this_game.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
NameThisGame has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/othello.md b/docs/environments/atari/othello.md
index edfaf4d89..9e0529617 100644
--- a/docs/environments/atari/othello.md
+++ b/docs/environments/atari/othello.md
@@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Othello has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/pacman.md b/docs/environments/atari/pacman.md
index 090700a4a..e593a66c9 100644
--- a/docs/environments/atari/pacman.md
+++ b/docs/environments/atari/pacman.md
@@ -45,6 +45,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Pacman has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/phoenix.md b/docs/environments/atari/phoenix.md
index 464bbe8fc..6b693ede3 100644
--- a/docs/environments/atari/phoenix.md
+++ b/docs/environments/atari/phoenix.md
@@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Phoenix has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/pitfall.md b/docs/environments/atari/pitfall.md
index f5926ab18..bb1d7b54b 100644
--- a/docs/environments/atari/pitfall.md
+++ b/docs/environments/atari/pitfall.md
@@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You get score points for collecting treasure, you lose points through some misfortunes like falling down a hole.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360).
+## Rewards
+You get score points for collecting treasure, you lose points through some misfortunes like falling down a hole. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360).
## Variants
diff --git a/docs/environments/atari/pitfall2.md b/docs/environments/atari/pitfall2.md
index 76ac25e47..68c8b0915 100644
--- a/docs/environments/atari/pitfall2.md
+++ b/docs/environments/atari/pitfall2.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Pitfall2 has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/pong.md b/docs/environments/atari/pong.md
index 6582dc0dc..b58686e91 100644
--- a/docs/environments/atari/pong.md
+++ b/docs/environments/atari/pong.md
@@ -46,10 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You get score points for getting the ball to pass the opponent's paddle. You lose points if the ball passes your paddle.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587).
+## Rewards
+You get score points for getting the ball to pass the opponent's paddle. You lose points if the ball passes your paddle. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587).
## Variants
diff --git a/docs/environments/atari/pooyan.md b/docs/environments/atari/pooyan.md
index cfd585118..55c763d98 100644
--- a/docs/environments/atari/pooyan.md
+++ b/docs/environments/atari/pooyan.md
@@ -46,10 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-If you hit a balloon, wolf or stone with an arrow you score points.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372).
+## Rewards
+If you hit a balloon, wolf or stone with an arrow you score points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372).
## Variants
diff --git a/docs/environments/atari/private_eye.md b/docs/environments/atari/private_eye.md
index 58a79fbad..d4c9e7db2 100644
--- a/docs/environments/atari/private_eye.md
+++ b/docs/environments/atari/private_eye.md
@@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points for completing your tasks like gathering evidence, nabbing questionable characters or closing cases etc. You lose points if you get hit or if your auto is on a pothole.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376).
+## Rewards
+You score points for completing your tasks like gathering evidence, nabbing questionable characters or closing cases etc. You lose points if you get hit or if your auto is on a pothole.For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376).
## Variants
diff --git a/docs/environments/atari/qbert.md b/docs/environments/atari/qbert.md
index 9e5966ded..126451688 100644
--- a/docs/environments/atari/qbert.md
+++ b/docs/environments/atari/qbert.md
@@ -46,10 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points for changing color of the cubes to their destination colors or by defeating enemies. You also gain points for completing a level.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL).
+## Rewards
+You score points for changing color of the cubes to their destination colors or by defeating enemies. You also gain points for completing a level. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL).
## Variants
diff --git a/docs/environments/atari/riverraid.md b/docs/environments/atari/riverraid.md
index 753beeb24..29a4d5718 100644
--- a/docs/environments/atari/riverraid.md
+++ b/docs/environments/atari/riverraid.md
@@ -21,7 +21,7 @@ For more Riverraid variants with different observation and action spaces, see th
## Description
-You control a jet that flies over a river: you can move it sideways and fire missiles to destroy enemy objects. Each time an enemy object is destroyed you score points (i.e. rewards).You lose a jet when you run out of fuel: fly over a fuel depot when you begin to run low.You lose a jet even when it collides with the river bank or one of the enemy objects (except fuel depots).The game begins with a squadron of three jets in reserve and you're given an additional jet (up to 9) for each 10,000 points you score.
+You control a jet that flies over a river: you can move it sideways and fire missiles to destroy enemy objects. Each time an enemy object is destroyed you score points (i.e. rewards).You lose a jet when you run out of fuel: fly over a fuel depot when you begin to run low. You lose a jet even when it collides with the river bank or one of the enemy objects (except fuel depots).The game begins with a squadron of three jets in reserve and you're given an additional jet (up to 9) for each 10,000 points you score.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409)
@@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
+## Rewards
Score points are your only reward. You get score points each time you destroy an enemy object:
-
| Enemy Object | Score Points |
|--------------|--------------|
| Tanker | 30 |
@@ -60,8 +58,7 @@ Score points are your only reward. You get score points each time you destroy an
| Fuel Depot | 80 |
| Jet | 100 |
| Bridge | 500 |
-
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409).
+For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409).
## Variants
diff --git a/docs/environments/atari/road_runner.md b/docs/environments/atari/road_runner.md
index 993e56380..7121d3051 100644
--- a/docs/environments/atari/road_runner.md
+++ b/docs/environments/atari/road_runner.md
@@ -21,7 +21,7 @@ For more RoadRunner variants with different observation and action spaces, see t
## Description
-You control the Road Runner(TM) in a race; you can control the direction to run in and times to jumps.The goal is to outrun Wile E. Coyote(TM) while avoiding the hazards of the desert.The game begins with three lives. You lose a life when the coyote catches you, picks you up in a rocket, or shoots you with a cannon. You also lose a life when a truck hits you, you hit a land mine, you fall off a cliff,or you get hit by a falling rock.You score points (i.e. rewards) by eating seeds along the road, eating steel shot, and destroying the coyote.
+You control the Road Runner(TM) in a race; you can control the direction to run in and times to jumps. The goal is to outrun Wile E. Coyote(TM) while avoiding the hazards of the desert. The game begins with three lives. You lose a life when the coyote catches you, picks you up in a rocket, or shoots you with a cannon. You also lose a life when a truck hits you, you hit a land mine, you fall off a cliff,or you get hit by a falling rock. You score points (i.e. rewards) by eating seeds along the road, eating steel shot, and destroying the coyote.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412)
@@ -49,18 +49,15 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
+## Rewards
Score points are your only reward. You get score points each time you:
-
| actions | points |
|-------------------------------------------------------|--------|
| eat a pile of birdseed | 100 |
| eat steel shot | 100 |
| get the coyote hit by a mine (cannonball, rock, etc.) | 200 |
| get the coyote hit by a truck | 1000 |
-
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412).
+For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412).
## Variants
diff --git a/docs/environments/atari/robotank.md b/docs/environments/atari/robotank.md
index fb863a709..285b11d06 100644
--- a/docs/environments/atari/robotank.md
+++ b/docs/environments/atari/robotank.md
@@ -21,7 +21,7 @@ For more Robotank variants with different observation and action spaces, see the
## Description
-You control your Robot Tanks to destroy enemies and avoid enemy fire.Game ends when all of your Robot Tanks are destroyed or all 12 enemy squadrons are destroyed.The game begins with one active Robot Tank and three reserves.Your Robot Tank may get lost when it is hit by enemy rocket fire - your video scrambles with static interference when this happens - or just become damaged - sensors report the damage by flashing on your control panel (look at V/C/R/T squares).You earn one bonus Robot Tank for every enemy squadron destroyed. The maximum number of bonus Robot Tanks allowed at any one time is 12.
+You control your Robot Tanks to destroy enemies and avoid enemy fire. Game ends when all of your Robot Tanks are destroyed or all 12 enemy squadrons are destroyed. The game begins with one active Robot Tank and three reserves. Your Robot Tank may get lost when it is hit by enemy rocket fire - your video scrambles with static interference when this happens - or just become damaged - sensors report the damage by flashing on your control panel (look at V/C/R/T squares).You earn one bonus Robot Tank for every enemy squadron destroyed. The maximum number of bonus Robot Tanks allowed at any one time is 12.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416)
@@ -49,15 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-The number of enemies destroyed is the only reward.
-
-A small tank appears at the top of your screen for each enemy
- you destroy. A square with the number 12 appears each time a squadron of twelve enemies are
- destroyed.
-
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416).
+## Rewards
+The number of enemies destroyed is the only reward. A small tank appears at the top of your screen for each enemy you destroy. A square with the number 12 appears each time a squadron of twelve enemies are destroyed. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416).
## Variants
diff --git a/docs/environments/atari/seaquest.md b/docs/environments/atari/seaquest.md
index 32baf109a..57b1bb49d 100644
--- a/docs/environments/atari/seaquest.md
+++ b/docs/environments/atari/seaquest.md
@@ -21,7 +21,7 @@ For more Seaquest variants with different observation and action spaces, see the
## Description
-You control a sub able to move in all directions and fire torpedoes.The goal is to retrieve as many divers as you can, while dodging and blasting enemy subs and killer sharks; points will be awarded accordingly.The game begins with one sub and three waiting on the horizon. Each time you increase your score by 10,000 points, an extra sub will be delivered to yourbase. You can only have six reserve subs on the screen at one time.Your sub will explode if it collides with anything except your own divers.The sub has a limited amount of oxygen that decreases at a constant rate during the game. When the oxygen tank is almost empty, you need to surface and if you don't do it in time, your sub will blow up and you'll lose one diver. Each time you're forced to surface, with less than six divers, you lose one diver as well.
+You control a sub able to move in all directions and fire torpedoes. The goal is to retrieve as many divers as you can, while dodging and blasting enemy subs and killer sharks; points will be awarded accordingly. The game begins with one sub and three waiting on the horizon. Each time you increase your score by 10,000 points, an extra sub will be delivered to yourbase. You can only have six reserve subs on the screen at one time. Your sub will explode if it collides with anything except your own divers. The sub has a limited amount of oxygen that decreases at a constant rate during the game. When the oxygen tank is almost empty, you need to surface and if you don't do it in time, your sub will blow up and you'll lose one diver. Each time you're forced to surface, with less than six divers, you lose one diver as well.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424)
@@ -49,22 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-Score points are your only reward.
-
-Blasting enemy sub and killer shark is worth
-20 points. Every time you surface with six divers, the value of enemy subs
-and killer sharks increases by 10, up to a maximum of 90 points each.
-
-Rescued divers start at 50 points each. Then, their point value increases by 50, every
-time you surface, up to a maximum of 1000 points each.
-
-You'll be further rewarded with bonus points for all the oxygen you have remaining the
-moment you surface. The more oxygen you have left, the more bonus points
-you're given.
-
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424).
+## Rewards
+Score points are your only reward. Blasting enemy sub and killer shark is worth 20 points. Every time you surface with six divers, the value of enemy subs and killer sharks increases by 10, up to a maximum of 90 points each. Rescued divers start at 50 points each. Then, their point value increases by 50, every time you surface, up to a maximum of 1000 points each. You'll be further rewarded with bonus points for all the oxygen you have remaining the moment you surface. The more oxygen you have left, the more bonus points you're given. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424).
## Variants
diff --git a/docs/environments/atari/sir_lancelot.md b/docs/environments/atari/sir_lancelot.md
index 40fec5bcd..6a5b23279 100644
--- a/docs/environments/atari/sir_lancelot.md
+++ b/docs/environments/atari/sir_lancelot.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
SirLancelot has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/skiing.md b/docs/environments/atari/skiing.md
index f76c027c3..ddc058bfc 100644
--- a/docs/environments/atari/skiing.md
+++ b/docs/environments/atari/skiing.md
@@ -21,7 +21,7 @@ For more Skiing variants with different observation and action spaces, see the v
## Description
-You control a skier who can move sideways.The goal is to run through all gates (between the poles) in the fastest time.You are penalized five seconds for each gate you miss.If you hit a gate or a tree, your skier will jump back up and keep going.
+You control a skier who can move sideways.The goal is to run through all gates (between the poles) in the fastest time. You are penalized five seconds for each gate you miss. If you hit a gate or a tree, your skier will jump back up and keep going.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=434)
@@ -45,11 +45,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-Seconds are your only rewards - negative rewards and penalties (e.g. missing a gate) are assigned as additional seconds.
-
-For a more detailed documentation, see [the AtariAge page [SLALOM RACING section]](https://atariage.com/manual_html_page.php?SoftwareLabelID=434).
+## Rewards
+Seconds are your only rewards - negative rewards and penalties (e.g. missing a gate) are assigned as additional seconds. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=434) in particular the Slalom racing section.
## Variants
diff --git a/docs/environments/atari/solaris.md b/docs/environments/atari/solaris.md
index 70efcf09f..c70b5785d 100644
--- a/docs/environments/atari/solaris.md
+++ b/docs/environments/atari/solaris.md
@@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You gain points for destroying enemies, rescuing cadets, making it through a corridor, destroying enemy planets etc.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450).
+## Rewards
+You gain points for destroying enemies, rescuing cadets, making it through a corridor, destroying enemy planets etc. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450).
## Variants
diff --git a/docs/environments/atari/space_invaders.md b/docs/environments/atari/space_invaders.md
index 4edc82452..c04020ce2 100644
--- a/docs/environments/atari/space_invaders.md
+++ b/docs/environments/atari/space_invaders.md
@@ -46,10 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You gain points for destroying space invaders. The invaders in the back rows are worth more points.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460).
+## Rewards
+You gain points for destroying space invaders. The invaders in the back rows are worth more points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460).
## Variants
diff --git a/docs/environments/atari/space_war.md b/docs/environments/atari/space_war.md
index 0d0c83001..f943726a2 100644
--- a/docs/environments/atari/space_war.md
+++ b/docs/environments/atari/space_war.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
SpaceWar has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/star_gunner.md b/docs/environments/atari/star_gunner.md
index 1b9fac37e..14e00552d 100644
--- a/docs/environments/atari/star_gunner.md
+++ b/docs/environments/atari/star_gunner.md
@@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points for destroying enemies. You get bonus points for clearing a wave and a level.
-For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html).
+## Rewards
+You score points for destroying enemies. You get bonus points for clearing a wave and a level. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html).
## Variants
diff --git a/docs/environments/atari/superman.md b/docs/environments/atari/superman.md
index 2cd3bbb03..ff5678501 100644
--- a/docs/environments/atari/superman.md
+++ b/docs/environments/atari/superman.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Superman has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/surround.md b/docs/environments/atari/surround.md
index 8ece7ed0d..d013e0274 100644
--- a/docs/environments/atari/surround.md
+++ b/docs/environments/atari/surround.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Surround has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/tennis.md b/docs/environments/atari/tennis.md
index 9e5649692..66d459d3f 100644
--- a/docs/environments/atari/tennis.md
+++ b/docs/environments/atari/tennis.md
@@ -21,7 +21,7 @@ For more Tennis variants with different observation and action spaces, see the v
## Description
-You control the orange player playing against a computer-controlled blue player. The game follows the rules of tennis.The first player to win at least 6 games with a margin of at least two games wins the match. If the score is tied at 6-6, the first player to go 2 games up wins the match.
+You control the orange player playing against a computer-controlled blue player. The game follows the rules of tennis. The first player to win at least 6 games with a margin of at least two games wins the match. If the score is tied at 6-6, the first player to go 2 games up wins the match.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555)
@@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-The scoring is as per the sport of tennis, played till one set.
-For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555).
+## Rewards
+The scoring is as per the sport of tennis, played till one set. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555).
## Variants
diff --git a/docs/environments/atari/tetris.md b/docs/environments/atari/tetris.md
index 80db410b2..56ca0e521 100644
--- a/docs/environments/atari/tetris.md
+++ b/docs/environments/atari/tetris.md
@@ -45,6 +45,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Tetris has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/tic_tac_toe_3d.md b/docs/environments/atari/tic_tac_toe_3d.md
index 53343866e..bf9affa50 100644
--- a/docs/environments/atari/tic_tac_toe_3d.md
+++ b/docs/environments/atari/tic_tac_toe_3d.md
@@ -21,7 +21,7 @@ For more TicTacToe3D variants with different observation and action spaces, see
## Description
-Players take turns placing their mark (an X or an O) on a 3-dimensional, 4 x 4 x 4 grid in an attempt to get 4 in a row before their opponent does.
+Players take turns placing their mark (an X or an O) on a 3-dimensional, 4x4x4 grid in an attempt to get 4 in a row before their opponent does.
For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=521)
@@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
TicTacToe3D has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/time_pilot.md b/docs/environments/atari/time_pilot.md
index f215db8b7..ced2d2ebc 100644
--- a/docs/environments/atari/time_pilot.md
+++ b/docs/environments/atari/time_pilot.md
@@ -48,10 +48,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
-### Rewards
-
-You score points for destroying enemies, gaining more points for difficult enemies.
-For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html).
+## Rewards
+You score points for destroying enemies, gaining more points for difficult enemies. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html).
## Variants
diff --git a/docs/environments/atari/trondead.md b/docs/environments/atari/trondead.md
index 5f2f64e53..5f914a539 100644
--- a/docs/environments/atari/trondead.md
+++ b/docs/environments/atari/trondead.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Trondead has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/turmoil.md b/docs/environments/atari/turmoil.md
index f7383c7e3..3738406f4 100644
--- a/docs/environments/atari/turmoil.md
+++ b/docs/environments/atari/turmoil.md
@@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Turmoil has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/tutankham.md b/docs/environments/atari/tutankham.md
index 91bf4e411..fd45478bf 100644
--- a/docs/environments/atari/tutankham.md
+++ b/docs/environments/atari/tutankham.md
@@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Tutankham has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/up_n_down.md b/docs/environments/atari/up_n_down.md
index 8646c2c75..3d28fa900 100644
--- a/docs/environments/atari/up_n_down.md
+++ b/docs/environments/atari/up_n_down.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
UpNDown has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/venture.md b/docs/environments/atari/venture.md
index 139fb8081..493e19c6a 100644
--- a/docs/environments/atari/venture.md
+++ b/docs/environments/atari/venture.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Venture has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/video_checkers.md b/docs/environments/atari/video_checkers.md
index 1e8f3fa32..ee0aeb9be 100644
--- a/docs/environments/atari/video_checkers.md
+++ b/docs/environments/atari/video_checkers.md
@@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
VideoCheckers has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/video_chess.md b/docs/environments/atari/video_chess.md
index a0e446038..b8b15c8c7 100644
--- a/docs/environments/atari/video_chess.md
+++ b/docs/environments/atari/video_chess.md
@@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
VideoChess has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/video_cube.md b/docs/environments/atari/video_cube.md
index ebdae5d3a..59ba9465d 100644
--- a/docs/environments/atari/video_cube.md
+++ b/docs/environments/atari/video_cube.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
VideoCube has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/video_pinball.md b/docs/environments/atari/video_pinball.md
index c38724da0..fa01f8328 100644
--- a/docs/environments/atari/video_pinball.md
+++ b/docs/environments/atari/video_pinball.md
@@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
VideoPinball has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/wizard_of_wor.md b/docs/environments/atari/wizard_of_wor.md
index 6141b7d1c..804729346 100644
--- a/docs/environments/atari/wizard_of_wor.md
+++ b/docs/environments/atari/wizard_of_wor.md
@@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
WizardOfWor has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/word_zapper.md b/docs/environments/atari/word_zapper.md
index b10230d6c..d2e9f0a9c 100644
--- a/docs/environments/atari/word_zapper.md
+++ b/docs/environments/atari/word_zapper.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
WordZapper has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/yars_revenge.md b/docs/environments/atari/yars_revenge.md
index f35d6058b..d9b68b35c 100644
--- a/docs/environments/atari/yars_revenge.md
+++ b/docs/environments/atari/yars_revenge.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
YarsRevenge has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/atari/zaxxon.md b/docs/environments/atari/zaxxon.md
index 287d6b735..1c5df6e5e 100644
--- a/docs/environments/atari/zaxxon.md
+++ b/docs/environments/atari/zaxxon.md
@@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"`
See variants section for the type of observation used by each environment id by default.
+
## Variants
Zaxxon has the following variants of the environment id which have the following differences in observation,
diff --git a/docs/environments/mujoco.md b/docs/environments/mujoco.md
index c55eacfd9..fa311901f 100644
--- a/docs/environments/mujoco.md
+++ b/docs/environments/mujoco.md
@@ -79,15 +79,22 @@ Gymnasium includes the following versions of the environments:
| ------- | --------------- | ------------------------------------------------ |
| `v5` | `mujoco=>2.3.3` | Recommended (most features, the least bugs) |
| `v4` | `mujoco=>2.1.3` | Maintained for reproducibility |
-| `v3` | `mujoco-py` | Maintained for reproducibility (limited support) |
-| `v2` | `mujoco-py` | Maintained for reproducibility (limited support) |
+| `v3` | `mujoco-py` | Deprecated, Kept for reproducibility (limited support) |
+| `v2` | `mujoco-py` | Deprecated, Kept for reproducibility (limited support) |
For more information, see the section "Version History" for each environment.
`v1` and older are no longer included in Gymnasium.
-Note: The exact behavior of the MuJoCo simulator changes slightly between `mujoco` versions due to floating point operation ordering (more information of their [Documentation]( https://mujoco.readthedocs.io/en/stable/computation/index.html#reproducibility))
+### Comparing training performance across versions
+The training performance of `v2` and `v3` is identical assuming the same/default arguments were used.
+The training performance of `v2`/`v3` and `v4` is not directly comparable because of the change to the newer simulator, but the results for not Ant and not Humanoids are comperable (for more information see [GitHub Comment #1](https://github.com/openai/gym/pull/2595#issuecomment-1099152505) and [GitHub Comment #2](https://github.com/openai/gym/pull/2762#issuecomment-1135362092)).
+
+The Training performance of `v4` and `v5` is different because of the many changes in the environments, but the Half Cheetah and Swimmer exchibits identical behaviour, Pusher and Swimmer are close (for more information see [GitHub Issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).
+
+### Exact reproducibility
+Note: The exact behavior of the MuJoCo simulator changes slightly between `mujoco` versions due to floating point operation ordering (more information of their [Documentation]( https://mujoco.readthedocs.io/en/stable/computation/index.html#reproducibility)), if exact reproducibility is need besides using the `seed` for expirements the same simulator version should be used.
## Rendering Arguments
The all MuJoCo Environments besides the general Gymnasium arguments, and environment specific arguments they also take the following arguments for configuring the renderer:
diff --git a/docs/tutorials/gymnasium_basics/load_quadruped_model.md b/docs/tutorials/gymnasium_basics/load_quadruped_model.md
new file mode 100644
index 000000000..d30e85bf0
--- /dev/null
+++ b/docs/tutorials/gymnasium_basics/load_quadruped_model.md
@@ -0,0 +1,247 @@
+Load custom quadruped robot environments
+================================
+
+In this tutorial we will see how to use the `MuJoCo/Ant-v5` framework to create a quadruped walking environment, using a model file (ending in `.xml`) without having to create a new class.
+
+Steps:
+
+0. Get your **MJCF** (or **URDF**) model file of your robot.
+ - Create your own model (see the [Guide](https://mujoco.readthedocs.io/en/stable/m22odeling.html)) or,
+ - Find a ready-made model (in this tutorial, we will use a model from the [**MuJoCo Menagerie**](https://github.com/google-deepmind/mujoco_menagerie) collection).
+1. Load the model with the `xml_file` argument.
+2. Tweak the environment parameters to get the desired behavior.
+ 1. Tweak the environment simulation parameters.
+ 2. Tweak the environment termination parameters.
+ 3. Tweak the environment reward parameters.
+ 4. Tweak the environment observation parameters.
+3. Train an agent to move your robot.
+
+
+The reader is expected to be familiar with the `Gymnasium` API & library, the basics of robotics, and the included `Gymnasium/MuJoCo` environments with the robot model they use. Familiarity with the **MJCF** file model format and the `MuJoCo` simulator is not required but is recommended.
+
+Setup
+------
+We will need `gymnasium>=1.0.0`.
+
+```sh
+pip install "gymnasium>=1.0.0"
+```
+
+Step 0.1 - Download a Robot Model
+-------------------------
+In this tutorial we will load the [Unitree Go1](
+https://github.com/google-deepmind/mujoco_menagerie/blob/main/unitree_go1/README.md) robot from the excellent [MuJoCo Menagerie](https://github.com/google-deepmind/mujoco_menagerie) robot model collection.
+![Unitree Go1 robot in a flat terrain scene](https://github.com/google-deepmind/mujoco_menagerie/blob/main/unitree_go1/go1.png?raw=true)
+
+`Go1` is a quadruped robot, controlling it to move is a significant learning problem, much harder than the `Gymnasium/MuJoCo/Ant` environment.
+
+We can download the whole MuJoCo Menagerie collection (which includes `Go1`),
+```sh
+git clone https://github.com/google-deepmind/mujoco_menagerie.git
+```
+You can use any other quadruped robot with this tutorial, just adjust the environment parameter values for your robot.
+
+
+Step 1 - Load the model
+-------------------------
+To load the model, all we have to do is use the `xml_file` argument with the `Ant-v5` framework.
+
+```py
+import gymnasium
+import numpy as np
+env = gymnasium.make('Ant-v5', xml_file='./mujoco_menagerie/unitree_go1/scene.xml')
+```
+
+Although this is enough to load the model, we will need to tweak some environment parameters to get the desired behavior for our environment, for now we will also explicitly set the simulation, termination, reward and observation arguments, which we will tweak in the next step.
+
+```py
+env = gymnasium.make(
+ 'Ant-v5',
+ xml_file='./mujoco_menagerie/unitree_go1/scene.xml',
+ forward_reward_weight=0,
+ ctrl_cost_weight=0,
+ contact_cost_weight=0,
+ healthy_reward=0,
+ main_body=1,
+ healthy_z_range=(0, np.inf),
+ include_cfrc_ext_in_observation=True,
+ exclude_current_positions_from_observation=False,
+ reset_noise_scale=0,
+ frame_skip=1,
+ max_episode_steps=1000,
+)
+```
+
+
+Step 2 - Tweaking the Environment Parameters
+-------------------------
+Tweaking the environment parameters is essential to get the desired behavior for learning.
+In the following subsections, the reader is encouraged to consult the [documentation of the arguments](https://gymnasium.farama.org/main/environments/mujoco/ant/#arguments) for more detailed information.
+
+
+
+Step 2.1 - Tweaking the Environment Simulation Parameters
+-------------------------
+The arguments of interest are `frame_skip`, `reset_noise_scale` and `max_episode_steps`.
+
+We want to tweak the `frame_skip` parameter to get `dt` to an acceptable value (typical values are `dt` $\in [0.01, 0.1]$ seconds),
+
+Reminder: $dt = frame\_skip \times model.opt.timestep$, where `model.opt.timestep` is the integrator time step selected in the MJCF model file.
+
+The `Go1` model we are using has an integrator timestep of `0.002`, so by selecting `frame_skip=25` we can set the value of `dt` to `0.05s`.
+
+To avoid overfitting the policy, `reset_noise_scale` should be set to a value appropriate to the size of the robot, we want the value to be as large as possible without the initial distribution of states being invalid (`Terminal` regardless of control actions), for `Go1` we choose a value of `0.1`.
+
+And `max_episode_steps` determines the number of steps per episode before `truncation`, here we set it to 1000 to be consistent with the based `Gymnasium/MuJoCo` environments, but if you need something higher you can set it so.
+
+
+```py
+env = gymnasium.make(
+ 'Ant-v5',
+ xml_file='./mujoco_menagerie/unitree_go1/scene.xml',
+ forward_reward_weight=0,
+ ctrl_cost_weight=0,
+ contact_cost_weight=0,
+ healthy_reward=0,
+ main_body=1,
+ healthy_z_range=(0, np.inf),
+ include_cfrc_ext_in_observation=True,
+ exclude_current_positions_from_observation=False,
+ reset_noise_scale=0.1, # set to avoid policy overfitting
+ frame_skip=25, # set dt=0.05
+ max_episode_steps=1000, # kept at 1000
+)
+```
+
+
+Step 2.2 - Tweaking the Environment Termination Parameters
+-------------------------
+Termination is important for robot environments to avoid sampling "useless" time steps.
+
+The arguments of interest are `terminate_when_unhealthy` and `healthy_z_range`.
+
+We want to set `healthy_z_range` to terminate the environment when the robot falls over, or jumps really high, here we have to choose a value that is logical for the height of the robot, for `Go1` we choose `(0.195, 0.75)`.
+Note: `healthy_z_range` checks the absolute value of the height of the robot, so if your scene contains different levels of elevation it should be set to `(-np.inf, np.inf)`
+
+We could also set `terminate_when_unhealthy=False` to disable termination altogether, which is not desirable in the case of `Go1`.
+
+```py
+env = gymnasium.make(
+ 'Ant-v5',
+ xml_file='./mujoco_menagerie/unitree_go1/scene.xml',
+ forward_reward_weight=0,
+ ctrl_cost_weight=0,
+ contact_cost_weight=0,
+ healthy_reward=0,
+ main_body=1,
+ healthy_z_range=(0.195, 0.75), # set to avoid sampling steps where the robot has fallen or jumped too high
+ include_cfrc_ext_in_observation=True,
+ exclude_current_positions_from_observation=False,
+ reset_noise_scale=0.1,
+ frame_skip=25,
+ max_episode_steps=1000,
+)
+```
+
+Note: If you need a different termination condition, you can write your own `TerminationWrapper` (see the [documentation](https://gymnasium.farama.org/main/api/wrappers/)).
+
+
+
+Step 2.3 - Tweaking the Environment Reward Parameters
+-------------------------
+The arguments of interest are `forward_reward_weight`, `ctrl_cost_weight`, `contact_cost_weight`, `healthy_reward`, and `main_body`.
+
+For the arguments `forward_reward_weight`, `ctrl_cost_weight`, `contact_cost_weight` and `healthy_reward` we have to pick values that make sense for our robot, you can use the default `MuJoCo/Ant` parameters for references and tweak them if a change is needed for your environment. In the case of `Go1` we only change the `ctrl_cost_weight` since it has a higher actuator force range.
+
+For the argument `main_body` we have to choose which body part is the main body (usually called something like "torso" or "trunk" in the model file) for the calculation of the `forward_reward`, in the case of `Go1` it is the `"trunk"` (Note: in most cases including this one, it can be left at the default value).
+
+```py
+env = gymnasium.make(
+ 'Ant-v5',
+ xml_file='./mujoco_menagerie/unitree_go1/scene.xml',
+ forward_reward_weight=1, # kept the same as the 'Ant' environment
+ ctrl_cost_weight=0.05, # changed because of the stronger motors of `Go1`
+ contact_cost_weight=5e-4, # kept the same as the 'Ant' environment
+ healthy_reward=1, # kept the same as the 'Ant' environment
+ main_body=1, # represents the "trunk" of the `Go1` robot
+ healthy_z_range=(0.195, 0.75),
+ include_cfrc_ext_in_observation=True,
+ exclude_current_positions_from_observation=False,
+ reset_noise_scale=0.1,
+ frame_skip=25,
+ max_episode_steps=1000,
+)
+```
+
+Note: If you need a different reward function, you can write your own `RewardWrapper` (see the [documentation](https://gymnasium.farama.org/main/api/wrappers/reward_wrappers/)).
+
+
+
+Step 2.4 - Tweaking the Environment Observation Parameters
+-------------------------
+The arguments of interest are `include_cfrc_ext_in_observation` and `exclude_current_positions_from_observation`.
+
+Here for `Go1` we have no particular reason to change them.
+
+```py
+env = gymnasium.make(
+ 'Ant-v5',
+ xml_file='./mujoco_menagerie/unitree_go1/scene.xml',
+ forward_reward_weight=1,
+ ctrl_cost_weight=0.05,
+ contact_cost_weight=5e-4,
+ healthy_reward=1,
+ main_body=1,
+ healthy_z_range=(0.195, 0.75),
+ include_cfrc_ext_in_observation=True, # kept the game as the 'Ant' environment
+ exclude_current_positions_from_observation=False, # kept the game as the 'Ant' environment
+ reset_noise_scale=0.1,
+ frame_skip=25,
+ max_episode_steps=1000,
+)
+```
+
+
+Note: If you need additional observation elements (such as additional sensors), you can write your own `ObservationWrapper` (see the [documentation](https://gymnasium.farama.org/main/api/wrappers/observation_wrappers/)).
+
+
+
+Step 3 - Train your Agent
+-------------------------
+Finally, we are done, we can use a RL algorithm to train an agent to walk/run the `Go1` robot.
+Note: If you have followed this guide with your own robot model, you may discover during training that some environment parameters were not as desired, feel free to go back to step 2 and change anything as needed.
+
+```py
+import gymnasium
+
+env = gymnasium.make(
+ 'Ant-v5',
+ xml_file='./mujoco_menagerie/unitree_go1/scene.xml',
+ forward_reward_weight=1,
+ ctrl_cost_weight=0.05,
+ contact_cost_weight=5e-4,
+ healthy_reward=1,
+ main_body=1,
+ healthy_z_range=(0.195, 0.75),
+ include_cfrc_ext_in_observation=True,
+ exclude_current_positions_from_observation=False,
+ reset_noise_scale=0.1,
+ frame_skip=25,
+ max_episode_steps=1000,
+)
+... # run your RL algorithm
+```
+![image](https://github.com/Kallinteris-Andreas/Gymnasium-kalli/assets/30759571/bf1797a3-264d-47de-b14c-e3c16072f695)
+
+
+
+
+
+Epilogue
+-------------------------
+You can follow this guide to create most quadruped environments.
+To create humanoid/bipedal robots, you can also follow this guide using the `Gymnasium/MuJoCo/Humnaoid-v5` framework.
+
+Author: [@kallinteris-andreas](https://github.com/Kallinteris-Andreas)
diff --git a/docs/tutorials/third-party-tutorials.md b/docs/tutorials/third-party-tutorials.md
index 2883cb75e..5fc792957 100644
--- a/docs/tutorials/third-party-tutorials.md
+++ b/docs/tutorials/third-party-tutorials.md
@@ -1,7 +1,16 @@
-
# Third-Party Tutorials
-## [AgileRL](https://docs.agilerl.com/en/latest/tutorials/gymnasium/index.html)
+
+*This page contains tutorials which are not maintained by Farama Foundation and, as such, cannot be guaranteed to function as intended.*
+
+*If you'd like to contribute an tutorial, please reach out on [Discord](https://discord.gg/bnJ6kubTg6).*
+
+
+## Third-Party tutorials about gymnasium
+
+## Third-Party tutorials about libraries that use gymnasium
+
+### [AgileRL](https://docs.agilerl.com/en/latest/tutorials/gymnasium/index.html)
AgileRL focuses on reducing the time taken for training models and hyperparameter optimisation (HPO) providing tutorials for using it with PPO, TD3 and Rainbow.
diff --git a/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py b/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py
index 355699b75..3312f2873 100644
--- a/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py
+++ b/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py
@@ -33,11 +33,12 @@
``env.step(A)`` allows us to take an action 'A' in the current environment 'env'. The environment then executes the action
and returns five variables:
-- ``next_obs``: This is the observation that the agent will receive after taking the action.
-- ``reward``: This is the reward that the agent will receive after taking the action.
-- ``terminated``: This is a boolean variable that indicates whether or not the environment has terminated.
-- ``truncated``: This is a boolean variable that also indicates whether the episode ended by early truncation, i.e., a time limit is reached.
-- ``info``: This is a dictionary that might contain additional information about the environment.
+ - ``next_obs``: This is the observation that the agent will receive after taking the action.
+ - ``reward``: This is the reward that the agent will receive after taking the action.
+ - ``terminated``: This is a boolean variable that indicates whether or not the environment has terminated.
+ - ``truncated``: This is a boolean variable that also indicates whether the episode ended by early truncation, i.e., a time limit is reached.
+ - ``info``: This is a dictionary that might contain additional information about the environment.
+
"""
from __future__ import annotations
diff --git a/gymnasium/__init__.py b/gymnasium/__init__.py
index 8f1e28ad4..21218046c 100644
--- a/gymnasium/__init__.py
+++ b/gymnasium/__init__.py
@@ -52,7 +52,7 @@
"logger",
"functional",
]
-__version__ = "1.0.0rc1"
+__version__ = "1.0.0a1"
# Initializing pygame initializes audio connections through SDL. SDL uses alsa by default on all Linux systems
diff --git a/gymnasium/core.py b/gymnasium/core.py
index e7bea4aca..304dad709 100644
--- a/gymnasium/core.py
+++ b/gymnasium/core.py
@@ -66,6 +66,8 @@ class Env(Generic[ObsType, ActType]):
# Created
_np_random: np.random.Generator | None = None
+ # will be set to the "invalid" value -1 if the seed of the currently set rng is unknown
+ _np_random_seed: int | None = None
def step(
self, action: ActType
@@ -90,7 +92,7 @@ def step(
reward (SupportsFloat): The reward as a result of taking the action.
terminated (bool): Whether the agent reaches the terminal state (as defined under the MDP of the task)
which can be positive or negative. An example is reaching the goal state or moving into the lava from
- the Sutton and Barton, Gridworld. If true, the user needs to call :meth:`reset`.
+ the Sutton and Barto Gridworld. If true, the user needs to call :meth:`reset`.
truncated (bool): Whether the truncation condition outside the scope of the MDP is satisfied.
Typically, this is a timelimit, but could also be used to indicate an agent physically going out of bounds.
Can be used to end the episode prematurely before a terminal state is reached.
@@ -130,10 +132,12 @@ def reset(
The ``return_info`` parameter was removed and now info is expected to be returned.
Args:
- seed (optional int): The seed that is used to initialize the environment's PRNG (`np_random`).
+ seed (optional int): The seed that is used to initialize the environment's PRNG (`np_random`) and
+ the read-only attribute `np_random_seed`.
If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed,
a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom).
- However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset.
+ However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset
+ and the env's :attr:`np_random_seed` will *not* be altered.
If you pass an integer, the PRNG will be reset even if it already exists.
Usually, you want to pass an integer *right after the environment has been initialized and then never again*.
Please refer to the minimal example above to see this paradigm in action.
@@ -148,7 +152,7 @@ def reset(
"""
# Initialize the RNG if the seed is manually passed
if seed is not None:
- self._np_random, seed = seeding.np_random(seed)
+ self._np_random, self._np_random_seed = seeding.np_random(seed)
def render(self) -> RenderFrame | list[RenderFrame] | None:
"""Compute the render frames as specified by :attr:`render_mode` during the initialization of the environment.
@@ -201,6 +205,20 @@ def unwrapped(self) -> Env[ObsType, ActType]:
"""
return self
+ @property
+ def np_random_seed(self) -> int:
+ """Returns the environment's internal :attr:`_np_random_seed` that if not set will first initialise with a random int as seed.
+
+ If :attr:`np_random_seed` was set directly instead of through :meth:`reset` or :meth:`set_np_random_through_seed`,
+ the seed will take the value -1.
+
+ Returns:
+ int: the seed of the current `np_random` or -1, if the seed of the rng is unknown
+ """
+ if self._np_random_seed is None:
+ self._np_random, self._np_random_seed = seeding.np_random()
+ return self._np_random_seed
+
@property
def np_random(self) -> np.random.Generator:
"""Returns the environment's internal :attr:`_np_random` that if not set will initialise with a random seed.
@@ -209,12 +227,20 @@ def np_random(self) -> np.random.Generator:
Instances of `np.random.Generator`
"""
if self._np_random is None:
- self._np_random, _ = seeding.np_random()
+ self._np_random, self._np_random_seed = seeding.np_random()
return self._np_random
@np_random.setter
def np_random(self, value: np.random.Generator):
+ """Sets the environment's internal :attr:`_np_random` with the user-provided Generator.
+
+ Since it is generally not possible to extract a seed from an instance of a random number generator,
+ this will also set the :attr:`_np_random_seed` to `-1`, which is not valid as input for the creation
+ of a numpy rng.
+ """
self._np_random = value
+ # Setting a numpy rng with -1 will cause a ValueError
+ self._np_random_seed = -1
def __str__(self):
"""Returns a string of the environment with :attr:`spec` id's if :attr:`spec.
@@ -303,6 +329,11 @@ def close(self):
"""Closes the wrapper and :attr:`env`."""
return self.env.close()
+ @property
+ def np_random_seed(self) -> int | None:
+ """Returns the base enviroment's :attr:`np_random_seed`."""
+ return self.env.np_random_seed
+
@property
def unwrapped(self) -> Env[ObsType, ActType]:
"""Returns the base environment of the wrapper.
diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py
index 14ad83164..ae4dd7b0b 100644
--- a/gymnasium/envs/box2d/bipedal_walker.py
+++ b/gymnasium/envs/box2d/bipedal_walker.py
@@ -142,11 +142,15 @@ class BipedalWalker(gym.Env, EzPickle):
if the walker exceeds the right end of the terrain length.
## Arguments
- To use the _hardcore_ environment, you need to specify the
- `hardcore=True` argument like below:
+
+ To use the _hardcore_ environment, you need to specify the `hardcore=True`:
+
```python
- import gymnasium as gym
- env = gym.make("BipedalWalker-v3", hardcore=True)
+ >>> import gymnasium as gym
+ >>> env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
+ >>> env
+