diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d4899d2b0..b38fd2834 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,6 +65,6 @@ repos: language: node pass_filenames: false types: [python] - additional_dependencies: ["pyright"] + additional_dependencies: ["pyright@1.1.347"] args: - --project=pyproject.toml diff --git a/README.md b/README.md index 9ebd08f57..9604e2470 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ -[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8127025.svg)](https://doi.org/10.5281/zenodo.8127025) - +[![Python](https://img.shields.io/pypi/pyversions/gymnasium.svg)](https://badge.fury.io/py/gymnasium) +[![PyPI](https://badge.fury.io/py/gymnasium.svg)](https://badge.fury.io/py/gymnasium) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8127025.svg)](https://doi.org/10.5281/zenodo.8127025) +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

diff --git a/docs/_scripts/atari-docs.json b/docs/_scripts/atari-docs.json index 16fece669..63962bba8 100644 --- a/docs/_scripts/atari-docs.json +++ b/docs/_scripts/atari-docs.json @@ -12,32 +12,32 @@ "alien": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=815", "env_description": "You are stuck in a maze-like space ship with three aliens. You goal is to destroy their eggs that are scattered all over the ship while simultaneously avoiding the aliens (they are trying to kill you). You have a flamethrower that can help you turn them away in tricky situations. Moreover, you can occasionally collect a power-up (pulsar) that gives you the temporary ability to kill aliens.", - "reward_description": "### Rewards\n\nYou score points by destroying eggs, killing aliens, using pulsars, and collecting special prizes. When you are caught\nby an alien, you will lose one of your lives. The number of lives you have depends on the game flavor. For a\ntable of scores corresponding to the different achievements, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815).\n" + "reward_description": "## Rewards\nYou score points by destroying eggs, killing aliens, using pulsars, and collecting special prizes. When you are caught by an alien, you will lose one of your lives. The number of lives you have depends on the game flavor. For a table of scores corresponding to the different achievements, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815)." }, "amidar": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=817", "env_description": "This game is similar to Pac-Man: You are trying to visit all places on a 2-dimensional grid while simultaneously avoiding your enemies. You can turn the tables at one point in the game: Your enemies turn into chickens and you can catch them.", - "reward_description": "### Rewards\n\nYou score points by traversing new parts of the grid. Coloring an entire box in the maze or catching chickens gives extra points.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817).\n" + "reward_description": "## Rewards\nYou score points by traversing new parts of the grid. Coloring an entire box in the maze or catching chickens gives extra points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817)." }, "assault": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=827", - "env_description": "You control a vehicle that can move sideways. A big mother ship circles overhead and continually deploys smaller drones.You must destroy these enemies and dodge their attacks.", + "env_description": "You control a vehicle that can move sideways. A big mother ship circles overhead and continually deploys smaller drones. You must destroy these enemies and dodge their attacks.", "reward_description": "" }, "asterix": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=3325", "env_description": "You are Asterix and can move horizontally (continuously) and vertically (discretely). Objects move horizontally across the screen: lyres and other (more useful) objects. Your goal is to guideAsterix in such a way as to avoid lyres and collect as many other objects as possible. You score points by collecting objects and lose a life whenever you collect a lyre. You have three lives available at the beginning. If you score sufficiently many points, you will be awarded additional points.", - "reward_description": "### Rewards\n\nA table of scores awarded for collecting the different objects is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325).\n" + "reward_description": "## Rewards\nA table of scores awarded for collecting the different objects is provided on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325)." }, "asteroids": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=828", "env_description": "This is a well-known arcade game: You control a spaceship in an asteroid field and must break up asteroids by shooting them. Once all asteroids are destroyed, you enter a new level and new asteroids will appear. You will occasionally be attacked by a flying saucer.", - "reward_description": "### Rewards\n\nYou score points for destroying asteroids, satellites and UFOs. The smaller the asteroid, the more points you score\nfor destroying it.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL).\n" + "reward_description": "## Rewards\nYou score points for destroying asteroids, satellites and UFOs. The smaller the asteroid, the more points you score for destroying it. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL)." }, "atlantis": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=835", - "env_description": "Your job is to defend the submerged city of Atlantis. Your enemies slowly descend towards the city and you must destroy them before they reach striking distance. To this end, you control three defense posts.You lose if your enemies manage to destroy all seven of Atlantis' installations. You may rebuild installations after you have fought of a wave of enemies and scored a sufficient number of points.", - "reward_description": "### Rewards\n\nYou score points for destroying enemies, keeping installations protected during attack waves. You score more points\nif you manage to destroy your enemies with one of the outer defense posts.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835).\n" + "env_description": "Your job is to defend the submerged city of Atlantis. Your enemies slowly descend towards the city and you must destroy them before they reach striking distance. To this end, you control three defense posts. You lose if your enemies manage to destroy all seven of Atlantis' installations. You may rebuild installations after you have fought of a wave of enemies and scored a sufficient number of points.", + "reward_description": "## Rewards\nYou score points for destroying enemies, keeping installations protected during attack waves. You score more points if you manage to destroy your enemies with one of the outer defense posts. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835)." }, "atlantis2": { "atariage_url": "", @@ -51,28 +51,28 @@ }, "bank_heist": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=1008", - "env_description": "You are a bank robber and (naturally) want to rob as many banks as possible. You control your getaway car and must navigate maze-like cities. The police chases you and will appear whenever you rob a bank. You may destroy police cars by dropping sticks of dynamite. You can fill up your gas tank by entering a new city.At the beginning of the game you have four lives. Lives are lost if you run out of gas, are caught by the police,or run over the dynamite you have previously dropped.", - "reward_description": "### Rewards\n\nYou score points for robbing banks and destroying police cars. If you rob nine or more banks, and then leave the city,\nyou will score extra points.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008).\n" + "env_description": "You are a bank robber and (naturally) want to rob as many banks as possible. You control your getaway car and must navigate maze-like cities. The police chases you and will appear whenever you rob a bank. You may destroy police cars by dropping sticks of dynamite. You can fill up your gas tank by entering a new city. At the beginning of the game you have four lives. Lives are lost if you run out of gas, are caught by the police,or run over the dynamite you have previously dropped.", + "reward_description": "## Rewards\nYou score points for robbing banks and destroying police cars. If you rob nine or more banks, and then leave the city, you will score extra points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008)." }, "basic_math": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=14", - "env_description": "You must solve basic math problems using a joystick\nto scroll to the correct numeric answer.", + "env_description": "You must solve basic math problems using a joystick to scroll to the correct numeric answer.", "reward_description": "" }, "battle_zone": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=859", "env_description": "You control a tank and must destroy enemy vehicles. This game is played in a first-person perspective and creates a 3D illusion. A radar screen shows enemies around you. You start with 5 lives and gain up to 2 extra lives if you reach a sufficient score.", - "reward_description": "### Rewards\n\nYou receive points for destroying enemies.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL).\n" + "reward_description": "## Rewards\nYou receive points for destroying enemies. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL)." }, "beam_rider": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=860", "env_description": "You control a space-ship that travels forward at a constant speed. You can only steer it sideways between discrete positions. Your goal is to destroy enemy ships, avoid their attacks and dodge space debris.", - "reward_description": "### Rewards\n\nYou score points for destroying enemies.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL).\n" + "reward_description": "## Rewards\nYou score points for destroying enemies. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL)." }, "berzerk": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=866", - "env_description": "You are stuck in a maze with evil robots. You must destroy them and avoid touching the walls of the maze, as this will kill you. You may be awarded extra lives after scoring a sufficient number of points, depending on the game mode.You may also be chased by an undefeatable enemy, Evil Otto, that you must avoid. Evil Otto does not appear in the default mode.", - "reward_description": "### Rewards\n\nYou score points for destroying robots.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL).\n" + "env_description": "You are stuck in a maze with evil robots. You must destroy them and avoid touching the walls of the maze, as this will kill you. You may be awarded extra lives after scoring a sufficient number of points, depending on the game mode. You may also be chased by an undefeatable enemy, Evil Otto, that you must avoid. Evil Otto does not appear in the default mode.", + "reward_description": "## Rewards\nYou score points for destroying robots. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL)." }, "blackjack": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=34", @@ -82,22 +82,22 @@ "bowling": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=879", "env_description": "Your goal is to score as many points as possible in the game of Bowling. A game consists of 10 frames and you have two tries per frame. Knocking down all pins on the first try is called a \"strike\". Knocking down all pins on the second roll is called a \"spar\". Otherwise, the frame is called \"open\".", - "reward_description": "### Rewards\n\nYou receive points for knocking down pins. The exact score depends on whether you manage a \"strike\", \"spare\" or \"open\"\nframe. Moreover, the points you score for one frame may depend on following frames.\nYou can score up to 300 points in one game (if you manage to do 12 strikes).\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879).\n" + "reward_description": "## Rewards\nYou receive points for knocking down pins. The exact score depends on whether you manage a \"strike\", \"spare\" or \"open\" frame. Moreover, the points you score for one frame may depend on following frames. You can score up to 300 points in one game (if you manage to do 12 strikes). For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879)." }, "boxing": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=882", "env_description": "You fight an opponent in a boxing ring. You score points for hitting the opponent. If you score 100 points, your opponent is knocked out.", - "reward_description": "### Rewards\n\nYou score points by landing punches.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882).\n" + "reward_description": "## Rewards\nYou score points by landing punches. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882)." }, "breakout": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=889", "env_description": "Another famous Atari game. The dynamics are similar to pong: You move a paddle and hit the ball in a brick wall at the top of the screen. Your goal is to destroy the brick wall. You can try to break through the wall and let the ball wreak havoc on the other side, all on its own! You have five lives.", - "reward_description": "### Rewards\n\nYou score points by destroying bricks in the wall. The reward for destroying a brick depends on the color of the brick.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889).\n" + "reward_description": "## Rewards\nYou score points by destroying bricks in the wall. The reward for destroying a brick depends on the color of the brick. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889)." }, "carnival": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=908", "env_description": "This is a \"shoot 'em up\" game. Targets move horizontally across the screen and you must shoot them. You are in control of a gun that can be moved horizontally. The supply of ammunition is limited and chickens may steal some bullets from you if you don't hit them in time.", - "reward_description": "### Rewards\n\nYou score points by destroying targets. Points (or bullets) may be subtracted if you hit the target when it shows a minus sign.\nYou will score extra points if it shows a plus sign!\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908).\n" + "reward_description": "## Rewards\nYou score points by destroying targets. Points (or bullets) may be subtracted if you hit the target when it shows a minus sign. You will score extra points if it shows a plus sign! For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908)." }, "casino": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=70", @@ -107,17 +107,17 @@ "centipede": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=911", "env_description": "You are an elf and must use your magic wands to fend off spiders, fleas and centipedes. Your goal is to protect mushrooms in an enchanted forest. If you are bitten by a spider, flea or centipede, you will be temporally paralyzed and you will lose a magic wand. The game ends once you have lost all wands. You may receive additional wands after scoring a sufficient number of points.", - "reward_description": "### Rewards\n\nYou score points by hitting centipedes, scorpions, fleas and spiders. Additional points are awarded after every round\n(i.e. after you have lost a wand) for mushrooms that were not destroyed.\nDetailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911).\n" + "reward_description": "## Rewards\nYou score points by hitting centipedes, scorpions, fleas and spiders. Additional points are awarded after every round (i.e. after you have lost a wand) for mushrooms that were not destroyed. Detailed documentation can be found on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911)." }, "chopper_command": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=921", - "env_description": "You control a helicopter and must protect truck convoys. To that end, you need to shoot down enemy aircraft.A mini-map is displayed at the bottom of the screen.", - "reward_description": "### Rewards\n\nYou score points by destroying planes and other helicopters. You score extra points at the end of every wave, depending on the number\nof trucks that have survived.\nDetailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921).\n" + "env_description": "You control a helicopter and must protect truck convoys. To that end, you need to shoot down enemy aircraft. A mini-map is displayed at the bottom of the screen.", + "reward_description": "## Rewards\nYou score points by destroying planes and other helicopters. You score extra points at the end of every wave, depending on the number of trucks that have survived. Detailed documentation can be found on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921)." }, "crazy_climber": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=113", "env_description": "You are a climber trying to reach the top of four buildings, while avoiding obstacles like closing windows and falling objects. When you receive damage (windows closing or objects) you will fall and lose one life; you have a total of 5 lives before the end games. At the top of each building, there's a helicopter which you need to catch to get to the next building. The goal is to climb as fast as possible while receiving the least amount of damage.", - "reward_description": "### Rewards\n\nA table of scores awarded for completing each row of a building is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113).\n" + "reward_description": "## Rewards\nA table of scores awarded for completing each row of a building is provided on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113)." }, "crossbow": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=115", @@ -131,13 +131,13 @@ }, "defender": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=128", - "env_description": "Aliens attack the earth. You control a spaceship and must defend humanity by destroying alien ships and rescuing humanoids.You have three lives and three smart bombs. You lose a live when you are shot down by an alien spaceship.Points are scored by destroying enemies and retrieving humans that are being abducted. You have an unlimited number of laser missiles.", - "reward_description": "### Rewards\n\nYou receive points for destroying enemies, rescuing abducted humans and keeping humans alive.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128).\n" + "env_description": "Aliens attack the earth. You control a spaceship and must defend humanity by destroying alien ships and rescuing humanoids. You have three lives and three smart bombs. You lose a live when you are shot down by an alien spaceship. Points are scored by destroying enemies and retrieving humans that are being abducted. You have an unlimited number of laser missiles.", + "reward_description": "## Rewards\nYou receive points for destroying enemies, rescuing abducted humans and keeping humans alive. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128)." }, "demon_attack": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=135", "env_description": "You are facing waves of demons in the ice planet of Krybor. Points are accumulated by destroying demons. You begin with 3 reserve bunkers, and can increase its number (up to 6) by avoiding enemy attacks. Each attack wave you survive without any hits, grants you a new bunker. Every time an enemy hits you, a bunker is destroyed. When the last bunker falls, the next enemy hit will destroy you and the game ends.", - "reward_description": "### Rewards\n\nEach enemy you slay gives you points. The amount of points depends on the type of demon and which\nwave you are in. A detailed table of scores is provided on [the AtariAge\npage](https://atariage.com/manual_html_page.php?SoftwareLabelID=135).\n" + "reward_description": "## Rewards\nEach enemy you slay gives you points. The amount of points depends on the type of demon and which wave you are in. A detailed table of scores is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=135)." }, "donkey_kong": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=149", @@ -147,7 +147,7 @@ "double_dunk": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=153", "env_description": "You are playing a 2v2 game of basketball. At the start of each possession, you select between a set of different plays and then execute them to either score or prevent your rivals from scoring.", - "reward_description": "### Rewards\n\nScores follow the rules of basketball. You can get either 3 points, 2 points foul line) depending\nfrom where you shoot. After a defensive foul, a successful shot from the foul line gives you 1\npoint.\n" + "reward_description": "## Rewards\nScores follow the rules of basketball. You can get either 3 points, 2 points foul line) depending from where you shoot. After a defensive foul, a successful shot from the foul line gives you 1 point." }, "earthworld": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=541", @@ -156,13 +156,13 @@ }, "elevator_action": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=1131", - "env_description": "You are a secret agent that must retrieve some secret documents and reach the ground level of a building by going down an elevator/stairs. Once you reach the ground level, you are picked up and taken to the next level. You are equipped with a gun to defend yourself against enemy agents waiting for you in each floor. You gather points by shooting down enemy agents and visiting apartments marked with a red door, which contain the secret documents.This is an unreleased prototype based on the arcade game.", - "reward_description": "### Rewards\n\nYou start with 4 lives and are awarded 100 points for each enemy shot, and 500 points for each\nsecret document collected (visiting a red door). Each time you get shot you lose one life and the\ngame ends when losing all lives.\n" + "env_description": "You are a secret agent that must retrieve some secret documents and reach the ground level of a building by going down an elevator/stairs. Once you reach the ground level, you are picked up and taken to the next level. You are equipped with a gun to defend yourself against enemy agents waiting for you in each floor. You gather points by shooting down enemy agents and visiting apartments marked with a red door, which contain the secret documents. This is an unreleased prototype based on the arcade game.", + "reward_description": "## Rewards\nYou start with 4 lives and are awarded 100 points for each enemy shot, and 500 points for each secret document collected (visiting a red door). Each time you get shot you lose one life and the game ends when losing all lives." }, "enduro": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=163", "env_description": "You are a racer in the National Enduro, a long-distance endurance race. You must overtake a certain amount of cars each day to stay on the race. The first day you need to pass 200 cars, and 300 foreach following day. The game ends if you do not meet your overtake quota for the day.", - "reward_description": "### Rewards\n\nYou get 1 point for each vehicle you overtake.\n" + "reward_description": "## Rewards\nYou get 1 point for each vehicle you overtake." }, "entombed": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=165", @@ -177,7 +177,7 @@ "fishing_derby": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=182", "env_description": "Your objective is to catch more sunfish than your opponent.", - "reward_description": "### Rewards\n\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=182).\n\nAtari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1)." + "reward_description": "## Rewards\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=182)." }, "flag_capture": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=183", @@ -187,7 +187,7 @@ "freeway": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=192", "env_description": "Your objective is to guide your chicken across lane after lane of busy rush hour traffic. You receive a point for every chicken that makes it to the top of the screen after crossing all the lanes of traffic.", - "reward_description": "### Rewards\n\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=192).\n\nAtari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1)." + "reward_description": "## Rewards\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=192)." }, "frogger": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=194", @@ -197,7 +197,7 @@ "frostbite": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=199", "env_description": "In Frostbite, the player controls \"Frostbite Bailey\" who hops back and forth across across an Arctic river, changing the color of the ice blocks from white to blue. Each time he does so, a block is added to his igloo.", - "reward_description": "### Rewards\n\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=199).\n\nAtari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1)." + "reward_description": "## Rewards\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=199)." }, "galaxian": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=202", @@ -207,12 +207,12 @@ "gopher": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=218", "env_description": "The player controls a shovel-wielding farmer who protects a crop of three carrots from a gopher.", - "reward_description": "### Rewards\n\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=218).\n\nAtari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1)." + "reward_description": "## Rewards\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=218)." }, "gravitar": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=223", "env_description": "The player controls a small blue spacecraft. The game starts in a fictional solar system with several planets to explore. If the player moves his ship into a planet, he will be taken to a side-view landscape.", - "reward_description": "### Rewards\n\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can\nfind these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=223).\n\nAtari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1)." + "reward_description": "## Rewards\nThe exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=223)." }, "hangman": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=231", @@ -227,7 +227,7 @@ "hero": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=228", "env_description": "You need to rescue miners that are stuck in a mine shaft. You have access to various tools: A propeller backpack that allows you to fly wherever you want, sticks of dynamite that can be used to blast through walls, a laser beam to kill vermin, and a raft to float across stretches of lava.You have a limited amount of power. Once you run out, you lose a live.", - "reward_description": "### Rewards\n\nYou score points for shooting critters, rescuing miners, and dynamiting walls.\nExtra points are rewarded for any power remaining after rescuing a miner.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228).\n" + "reward_description": "## Rewards\nYou score points for shooting critters, rescuing miners, and dynamiting walls. Extra points are rewarded for any power remaining after rescuing a miner. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228)." }, "human_cannonball": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=238", @@ -236,18 +236,18 @@ }, "ice_hockey": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=241", - "env_description": "Your goal is to score as many points as possible in a standard game of Ice Hockey over a 3-minute time period. The ball is usually called \"the puck\".There are 32 shot angles ranging from the extreme left to the extreme right. The angles can only aim towards the opponent's goal.Just as in real hockey, you can pass the puck by shooting it off the sides of the rink. This can be really key when you're in position to score a goal.", - "reward_description": "### Rewards\n\nYou score points by shooting the puck into your opponent's goal. Your opponent scores in the same manner.\nThere are no limits to how many points you can get per game, other than the time limit of 3-minute games.\nFor a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241).\n" + "env_description": "Your goal is to score as many points as possible in a standard game of Ice Hockey over a 3-minute time period. The ball is usually called \"the puck\".There are 32 shot angles ranging from the extreme left to the extreme right. The angles can only aim towards the opponent's goal. Just as in real hockey, you can pass the puck by shooting it off the sides of the rink. This can be really key when you're in position to score a goal.", + "reward_description": "## Rewards\nYou score points by shooting the puck into your opponent's goal. Your opponent scores in the same manner. There are no limits to how many points you can get per game, other than the time limit of 3-minute games.For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241)." }, "jamesbond": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=250", - "env_description": "Your mission is to control Mr. Bond's specially designed multipurpose craft to complete a variety of missions.The craft moves forward with a right motion and slightly back with a left motion.An up or down motion causes the craft to jump or dive.You can also fire by either lobbing a bomb to the bottom of the screen or firing a fixed angle shot to the top of the screen.", - "reward_description": "### Rewards\n\nThe game ends when you complete the last mission or when you lose the last craft. In either case, you'll receive your final score.\nThere will be a rating based on your score. The highest rating in NOVICE is 006. The highest rating in AGENT is 007.\nFor a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250).\n" + "env_description": "Your mission is to control Mr. Bond's specially designed multipurpose craft to complete a variety of missions. The craft moves forward with a right motion and slightly back with a left motion. An up or down motion causes the craft to jump or dive. You can also fire by either lobbing a bomb to the bottom of the screen or firing a fixed angle shot to the top of the screen.", + "reward_description": "## Rewards\nThe game ends when you complete the last mission or when you lose the last craft. In either case, you'll receive your final score. There will be a rating based on your score. The highest rating in NOVICE is 006. The highest rating in AGENT is 007. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250)." }, "journey_escape": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=252", - "env_description": "You must lead all 5 members of JOURNEY through waves of pesky characters and backstage obstacles to the Scarab Escape Vehicle before time runs out.You must also protect $50,000 in concert cash from grasping groupies, photographers, and promoters.", - "reward_description": "### Rewards\n\nAt the start of the game, you will have $50,000 and 60 units of time.\nYour end game score with be dependent on how much time you have remaining and who you encounter along the way.\nFor a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252).\n" + "env_description": "You must lead all 5 members of JOURNEY through waves of pesky characters and backstage obstacles to the Scarab Escape Vehicle before time runs out. You must also protect $50,000 in concert cash from grasping groupies, photographers, and promoters.", + "reward_description": "## Rewards\nAt the start of the game, you will have $50,000 and 60 units of time. Your end game score with be dependent on how much time you have remaining and who you encounter along the way. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252)." }, "kaboom": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=257", @@ -256,8 +256,8 @@ }, "kangaroo": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=923", - "env_description": "The object of the game is to score as many points as you can while controlling Mother Kangaroo to rescue her precious baby. You start the game with three lives.During this rescue mission, Mother Kangaroo encounters many obstacles. You need to help her climb ladders, pick bonus fruit, and throw punches at monkeys.", - "reward_description": "### Rewards\n\nYour score will be shown at the top right corner of the game.\nYour end game score with be dependent on how much time you have remaining and who you encounter along the way.\nFor a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923).\n" + "env_description": "The object of the game is to score as many points as you can while controlling Mother Kangaroo to rescue her precious baby. You start the game with three lives. During this rescue mission, Mother Kangaroo encounters many obstacles. You need to help her climb ladders, pick bonus fruit, and throw punches at monkeys.", + "reward_description": "## Rewards\nYour score will be shown at the top right corner of the game. Your end game score with be dependent on how much time you have remaining and who you encounter along the way. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923)." }, "keystone_kapers": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=261", @@ -281,8 +281,8 @@ }, "krull": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=267", - "env_description": "Your mission is to find and enter the Beast's Black Fortress, rescue Princess Lyssa, and destroy the Beast.The task is not an easy one, for the location of the Black Fortress changes with each sunrise on Krull.", - "reward_description": "### Rewards\n\nYou will receive various scores for each monster you kill.\nYou can play the game until you have lost all your lives.\nFor a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267).\n" + "env_description": "Your mission is to find and enter the Beast's Black Fortress, rescue Princess Lyssa, and destroy the Beast. The task is not an easy one, for the location of the Black Fortress changes with each sunrise on Krull.", + "reward_description": "## Rewards\nYou will receive various scores for each monster you kill. You can play the game until you have lost all your lives. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267)." }, "kung_fu_master": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=268", @@ -291,7 +291,7 @@ }, "laser_gates": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=271", - "env_description": "The Cryptic Computer is malfunctioning! Use your Dante Dart to navigate through the computer and deestroy the four Failsafe Detonators.", + "env_description": "The Cryptic Computer is malfunctioning! Use your Dante Dart to navigate through the computer and destroy the four Failsafe Detonators.", "reward_description": "" }, "lost_luggage": { @@ -347,7 +347,7 @@ "pitfall": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=360", "env_description": "You control Pitfall Harry and are tasked with collecting all the treasures in a jungle within 20 minutes. You have three lives. The game is over if you collect all the treasures or if you die or if the time runs out.", - "reward_description": "### Rewards\n\nYou get score points for collecting treasure, you lose points through some misfortunes like falling down a hole.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360).\n" + "reward_description": "## Rewards\nYou get score points for collecting treasure, you lose points through some misfortunes like falling down a hole. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360)." }, "pitfall2": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=359", @@ -357,42 +357,42 @@ "pong": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=587", "env_description": "You control the right paddle, you compete against the left paddle controlled by the computer. You each try to keep deflecting the ball away from your goal and into your opponent's goal.", - "reward_description": "### Rewards\n\nYou get score points for getting the ball to pass the opponent's paddle. You lose points if the ball passes your paddle.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587).\n" + "reward_description": "## Rewards\nYou get score points for getting the ball to pass the opponent's paddle. You lose points if the ball passes your paddle. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587)." }, "pooyan": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=372", "env_description": "You are a mother pig protecting her piglets (Pooyans) from wolves. In the first scene, you can move up and down a rope. Try to shoot the worker's balloons, while guarding yourself from attacks. If the wolves reach the ground safely they will get behind and try to eat you. In the second scene, the wolves try to float up. You have to try and stop them using arrows and bait. You die if a wolf eats you, or a stone or rock hits you.", - "reward_description": "### Rewards\n\nIf you hit a balloon, wolf or stone with an arrow you score points.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372).\n" + "reward_description": "## Rewards\nIf you hit a balloon, wolf or stone with an arrow you score points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372)." }, "private_eye": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=376", "env_description": "You control the French Private Eye Pierre Touche. Navigate the city streets, parks, secret passages, dead-ends and one-ways in search of the ringleader, Henri Le Fiend and his gang. You also need to find evidence and stolen goods that are scattered about. There are five cases, complete each case before its statute of limitations expires.", - "reward_description": "### Rewards\n\nYou score points for completing your tasks like gathering evidence, nabbing questionable characters or closing cases etc. You lose points if you get hit or if your auto is on a pothole.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376).\n" + "reward_description": "## Rewards\nYou score points for completing your tasks like gathering evidence, nabbing questionable characters or closing cases etc. You lose points if you get hit or if your auto is on a pothole.For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376)." }, "qbert": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareID=1224", "env_description": "You are Q*bert. Your goal is to change the color of all the cubes on the pyramid to the pyramid's 'destination' color. To do this, you must hop on each cube on the pyramid one at a time while avoiding nasty creatures that lurk there.", - "reward_description": "### Rewards\n\nYou score points for changing color of the cubes to their destination colors or by defeating enemies. You also gain points for completing a level.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL).\n" + "reward_description": "## Rewards\nYou score points for changing color of the cubes to their destination colors or by defeating enemies. You also gain points for completing a level. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL)." }, "riverraid": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=409", - "env_description": "You control a jet that flies over a river: you can move it sideways and fire missiles to destroy enemy objects. Each time an enemy object is destroyed you score points (i.e. rewards).You lose a jet when you run out of fuel: fly over a fuel depot when you begin to run low.You lose a jet even when it collides with the river bank or one of the enemy objects (except fuel depots).The game begins with a squadron of three jets in reserve and you're given an additional jet (up to 9) for each 10,000 points you score.", - "reward_description": "### Rewards\n\nScore points are your only reward. You get score points each time you destroy an enemy object:\n\n| Enemy Object | Score Points |\n|--------------|--------------|\n| Tanker | 30 |\n| Helicopter | 60 |\n| Fuel Depot | 80 |\n| Jet | 100 |\n| Bridge | 500 |\n\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409).\n" + "env_description": "You control a jet that flies over a river: you can move it sideways and fire missiles to destroy enemy objects. Each time an enemy object is destroyed you score points (i.e. rewards).You lose a jet when you run out of fuel: fly over a fuel depot when you begin to run low. You lose a jet even when it collides with the river bank or one of the enemy objects (except fuel depots).The game begins with a squadron of three jets in reserve and you're given an additional jet (up to 9) for each 10,000 points you score.", + "reward_description": "## Rewards\nScore points are your only reward. You get score points each time you destroy an enemy object:\n| Enemy Object | Score Points |\n|--------------|--------------|\n| Tanker | 30 |\n| Helicopter | 60 |\n| Fuel Depot | 80 |\n| Jet | 100 |\n| Bridge | 500 |\nFor a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409)." }, "road_runner": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=412", - "env_description": "You control the Road Runner(TM) in a race; you can control the direction to run in and times to jumps.The goal is to outrun Wile E. Coyote(TM) while avoiding the hazards of the desert.The game begins with three lives. You lose a life when the coyote catches you, picks you up in a rocket, or shoots you with a cannon. You also lose a life when a truck hits you, you hit a land mine, you fall off a cliff,or you get hit by a falling rock.You score points (i.e. rewards) by eating seeds along the road, eating steel shot, and destroying the coyote.", - "reward_description": "### Rewards\n\nScore points are your only reward. You get score points each time you:\n\n| actions | points |\n|-------------------------------------------------------|--------|\n| eat a pile of birdseed | 100 |\n| eat steel shot | 100 |\n| get the coyote hit by a mine (cannonball, rock, etc.) | 200 |\n| get the coyote hit by a truck | 1000 |\n\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412).\n" + "env_description": "You control the Road Runner(TM) in a race; you can control the direction to run in and times to jumps. The goal is to outrun Wile E. Coyote(TM) while avoiding the hazards of the desert. The game begins with three lives. You lose a life when the coyote catches you, picks you up in a rocket, or shoots you with a cannon. You also lose a life when a truck hits you, you hit a land mine, you fall off a cliff,or you get hit by a falling rock. You score points (i.e. rewards) by eating seeds along the road, eating steel shot, and destroying the coyote.", + "reward_description": "## Rewards\nScore points are your only reward. You get score points each time you:\n| actions | points |\n|-------------------------------------------------------|--------|\n| eat a pile of birdseed | 100 |\n| eat steel shot | 100 |\n| get the coyote hit by a mine (cannonball, rock, etc.) | 200 |\n| get the coyote hit by a truck | 1000 |\nFor a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412)." }, "robotank": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=416", - "env_description": "You control your Robot Tanks to destroy enemies and avoid enemy fire.Game ends when all of your Robot Tanks are destroyed or all 12 enemy squadrons are destroyed.The game begins with one active Robot Tank and three reserves.Your Robot Tank may get lost when it is hit by enemy rocket fire - your video scrambles with static interference when this happens - or just become damaged - sensors report the damage by flashing on your control panel (look at V/C/R/T squares).You earn one bonus Robot Tank for every enemy squadron destroyed. The maximum number of bonus Robot Tanks allowed at any one time is 12.", - "reward_description": "### Rewards\n\nThe number of enemies destroyed is the only reward.\n\nA small tank appears at the top of your screen for each enemy\n you destroy. A square with the number 12 appears each time a squadron of twelve enemies are\n destroyed.\n\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416).\n" + "env_description": "You control your Robot Tanks to destroy enemies and avoid enemy fire. Game ends when all of your Robot Tanks are destroyed or all 12 enemy squadrons are destroyed. The game begins with one active Robot Tank and three reserves. Your Robot Tank may get lost when it is hit by enemy rocket fire - your video scrambles with static interference when this happens - or just become damaged - sensors report the damage by flashing on your control panel (look at V/C/R/T squares).You earn one bonus Robot Tank for every enemy squadron destroyed. The maximum number of bonus Robot Tanks allowed at any one time is 12.", + "reward_description": "## Rewards\nThe number of enemies destroyed is the only reward. A small tank appears at the top of your screen for each enemy you destroy. A square with the number 12 appears each time a squadron of twelve enemies are destroyed. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416)." }, "seaquest": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=424", - "env_description": "You control a sub able to move in all directions and fire torpedoes.The goal is to retrieve as many divers as you can, while dodging and blasting enemy subs and killer sharks; points will be awarded accordingly.The game begins with one sub and three waiting on the horizon. Each time you increase your score by 10,000 points, an extra sub will be delivered to yourbase. You can only have six reserve subs on the screen at one time.Your sub will explode if it collides with anything except your own divers.The sub has a limited amount of oxygen that decreases at a constant rate during the game. When the oxygen tank is almost empty, you need to surface and if you don't do it in time, your sub will blow up and you'll lose one diver. Each time you're forced to surface, with less than six divers, you lose one diver as well.", - "reward_description": "### Rewards\n\nScore points are your only reward.\n\nBlasting enemy sub and killer shark is worth\n20 points. Every time you surface with six divers, the value of enemy subs\nand killer sharks increases by 10, up to a maximum of 90 points each.\n\nRescued divers start at 50 points each. Then, their point value increases by 50, every\ntime you surface, up to a maximum of 1000 points each.\n\nYou'll be further rewarded with bonus points for all the oxygen you have remaining the\nmoment you surface. The more oxygen you have left, the more bonus points\nyou're given.\n\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424).\n" + "env_description": "You control a sub able to move in all directions and fire torpedoes. The goal is to retrieve as many divers as you can, while dodging and blasting enemy subs and killer sharks; points will be awarded accordingly. The game begins with one sub and three waiting on the horizon. Each time you increase your score by 10,000 points, an extra sub will be delivered to yourbase. You can only have six reserve subs on the screen at one time. Your sub will explode if it collides with anything except your own divers. The sub has a limited amount of oxygen that decreases at a constant rate during the game. When the oxygen tank is almost empty, you need to surface and if you don't do it in time, your sub will blow up and you'll lose one diver. Each time you're forced to surface, with less than six divers, you lose one diver as well.", + "reward_description": "## Rewards\nScore points are your only reward. Blasting enemy sub and killer shark is worth 20 points. Every time you surface with six divers, the value of enemy subs and killer sharks increases by 10, up to a maximum of 90 points each. Rescued divers start at 50 points each. Then, their point value increases by 50, every time you surface, up to a maximum of 1000 points each. You'll be further rewarded with bonus points for all the oxygen you have remaining the moment you surface. The more oxygen you have left, the more bonus points you're given. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424)." }, "sir_lancelot": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=431", @@ -401,18 +401,18 @@ }, "skiing": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=434", - "env_description": "You control a skier who can move sideways.The goal is to run through all gates (between the poles) in the fastest time.You are penalized five seconds for each gate you miss.If you hit a gate or a tree, your skier will jump back up and keep going.", - "reward_description": "### Rewards\n\nSeconds are your only rewards - negative rewards and penalties (e.g. missing a gate) are assigned as additional seconds.\n\nFor a more detailed documentation, see [the AtariAge page [SLALOM RACING section]](https://atariage.com/manual_html_page.php?SoftwareLabelID=434).\n" + "env_description": "You control a skier who can move sideways.The goal is to run through all gates (between the poles) in the fastest time. You are penalized five seconds for each gate you miss. If you hit a gate or a tree, your skier will jump back up and keep going.", + "reward_description": "## Rewards\nSeconds are your only rewards - negative rewards and penalties (e.g. missing a gate) are assigned as additional seconds. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=434) in particular the Slalom racing section." }, "solaris": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=450", "env_description": "You control a spaceship. Blast enemies before they can blast you. You can warp to different sectors. You have to defend Federation planets, and destroy Zylon forces. Keep track of your fuel, if you run out you lose a life. Warp to a Federation planet to refuel. The game ends if all your ships are destroyed or if you reach the Solaris planet.", - "reward_description": "### Rewards\n\nYou gain points for destroying enemies, rescuing cadets, making it through a corridor, destroying enemy planets etc.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450).\n" + "reward_description": "## Rewards\nYou gain points for destroying enemies, rescuing cadets, making it through a corridor, destroying enemy planets etc. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450)." }, "space_invaders": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=460", "env_description": "Your objective is to destroy the space invaders by shooting your laser cannon at them before they reach the Earth. The game ends when all your lives are lost after taking enemy fire, or when they reach the earth.", - "reward_description": "### Rewards\n\nYou gain points for destroying space invaders. The invaders in the back rows are worth more points.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460).\n" + "reward_description": "## Rewards\nYou gain points for destroying space invaders. The invaders in the back rows are worth more points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460)." }, "space_war": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=470", @@ -422,7 +422,7 @@ "star_gunner": { "atariage_url": "http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html", "env_description": "Stop the alien invasion by shooting down alien saucers and creatures while avoiding bombs.", - "reward_description": "### Rewards\n\nYou score points for destroying enemies. You get bonus points for clearing a wave and a level.\nFor a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html).\n" + "reward_description": "## Rewards\nYou score points for destroying enemies. You get bonus points for clearing a wave and a level. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html)." }, "superman": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=533", @@ -436,8 +436,8 @@ }, "tennis": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=555", - "env_description": "You control the orange player playing against a computer-controlled blue player. The game follows the rules of tennis.The first player to win at least 6 games with a margin of at least two games wins the match. If the score is tied at 6-6, the first player to go 2 games up wins the match.", - "reward_description": "### Rewards\n\nThe scoring is as per the sport of tennis, played till one set.\nFor a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555).\n" + "env_description": "You control the orange player playing against a computer-controlled blue player. The game follows the rules of tennis. The first player to win at least 6 games with a margin of at least two games wins the match. If the score is tied at 6-6, the first player to go 2 games up wins the match.", + "reward_description": "## Rewards\nThe scoring is as per the sport of tennis, played till one set. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555)." }, "tetris": { "atariage_url": "", @@ -446,13 +446,13 @@ }, "tic_tac_toe_3d": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=521", - "env_description": "Players take turns placing their mark (an X or an O) on a 3-dimensional, 4 x 4 x 4 grid in an attempt to get 4 in a row before their opponent does.", + "env_description": "Players take turns placing their mark (an X or an O) on a 3-dimensional, 4x4x4 grid in an attempt to get 4 in a row before their opponent does.", "reward_description": "" }, "time_pilot": { "atariage_url": "http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html", "env_description": "You control an aircraft. Use it to destroy your enemies. As you progress in the game, you encounter enemies with technology that is increasingly from the future.", - "reward_description": "### Rewards\n\nYou score points for destroying enemies, gaining more points for difficult enemies.\nFor a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html).\n" + "reward_description": "## Rewards\nYou score points for destroying enemies, gaining more points for difficult enemies. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html)." }, "trondead": { "atariage_url": "https://atariage.com/manual_html_page.php?SoftwareLabelID=569", diff --git a/docs/_scripts/gen_atari_table.py b/docs/_scripts/gen_atari_table.py index e7ff9d6ea..1cd6f2841 100644 --- a/docs/_scripts/gen_atari_table.py +++ b/docs/_scripts/gen_atari_table.py @@ -9,6 +9,11 @@ import gymnasium +# Necessary for v1.0.0 without ale-py gymnasium support +# from shimmy import registration +# registration._register_atari_envs() + + # # Generate the list of all atari games on atari.md for rom_id in sorted(ALL_ATARI_GAMES): print(f"atari/{rom_id}") @@ -51,7 +56,7 @@ def shortened_repr(values): for rom_id in tqdm(ALL_ATARI_GAMES): env_name = rom_utils.rom_id_to_name(rom_id) - env = gymnasium.make(f"ALE/{env_name}-v5") + env = gymnasium.make(f"ALE/{env_name}-v5").unwrapped available_difficulties = env.ale.getAvailableDifficulties() default_difficulty = env.ale.cloneState().getDifficulty() @@ -83,7 +88,7 @@ def shortened_repr(values): for rom_id in tqdm(ALL_ATARI_GAMES): env_name = rom_utils.rom_id_to_name(rom_id) - env = gymnasium.make(f"ALE/{env_name}-v5") + env = gymnasium.make(f"ALE/{env_name}-v5").unwrapped if rom_id in atari_data: env_data = atari_data[rom_id] @@ -211,6 +216,7 @@ def shortened_repr(values): See variants section for the type of observation used by each environment id by default. {reward_description} + ## Variants {env_name} has the following variants of the environment id which have the following differences in observation, @@ -230,7 +236,7 @@ def shortened_repr(values): A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. -* v5: Stickiness was added back and stochastic frameskipping was removed. The environments are now in the "ALE" namespace. +* v5: Stickiness was added back and stochastic frame-skipping was removed. The environments are now in the "ALE" namespace. * v4: Stickiness of actions was removed * v0: Initial versions release """ diff --git a/docs/_scripts/gen_gifs.py b/docs/_scripts/gen_gifs.py index 0efac644e..ec9aee361 100644 --- a/docs/_scripts/gen_gifs.py +++ b/docs/_scripts/gen_gifs.py @@ -17,9 +17,6 @@ "FrozenLake8x8", "LunarLanderContinuous", "BipedalWalkerHardcore", - "CartPoleJax", - "PendulumJax", - "Jax-Blackjack", ] for env_spec in gym.registry.values(): if env_spec.name in exclude_env_names: diff --git a/docs/_static/videos/box2d/bipedal_walker.gif b/docs/_static/videos/box2d/bipedal_walker.gif index a8f8192c2..418d6f958 100644 Binary files a/docs/_static/videos/box2d/bipedal_walker.gif and b/docs/_static/videos/box2d/bipedal_walker.gif differ diff --git a/docs/_static/videos/box2d/car_racing.gif b/docs/_static/videos/box2d/car_racing.gif index 4f12c736c..88e67d795 100644 Binary files a/docs/_static/videos/box2d/car_racing.gif and b/docs/_static/videos/box2d/car_racing.gif differ diff --git a/docs/_static/videos/box2d/lunar_lander.gif b/docs/_static/videos/box2d/lunar_lander.gif index 700acf65a..94a9060ba 100644 Binary files a/docs/_static/videos/box2d/lunar_lander.gif and b/docs/_static/videos/box2d/lunar_lander.gif differ diff --git a/docs/_static/videos/classic_control/acrobot.gif b/docs/_static/videos/classic_control/acrobot.gif index 3f367f4ea..6241b7b54 100644 Binary files a/docs/_static/videos/classic_control/acrobot.gif and b/docs/_static/videos/classic_control/acrobot.gif differ diff --git a/docs/_static/videos/classic_control/cart_pole.gif b/docs/_static/videos/classic_control/cart_pole.gif index 96365f6a7..10d495dfb 100644 Binary files a/docs/_static/videos/classic_control/cart_pole.gif and b/docs/_static/videos/classic_control/cart_pole.gif differ diff --git a/docs/_static/videos/classic_control/mountain_car.gif b/docs/_static/videos/classic_control/mountain_car.gif index 5eeb6e8af..33dbe9838 100644 Binary files a/docs/_static/videos/classic_control/mountain_car.gif and b/docs/_static/videos/classic_control/mountain_car.gif differ diff --git a/docs/_static/videos/classic_control/mountain_car_continuous.gif b/docs/_static/videos/classic_control/mountain_car_continuous.gif index 846d8f460..91b564402 100644 Binary files a/docs/_static/videos/classic_control/mountain_car_continuous.gif and b/docs/_static/videos/classic_control/mountain_car_continuous.gif differ diff --git a/docs/_static/videos/classic_control/pendulum.gif b/docs/_static/videos/classic_control/pendulum.gif index c6f22d183..09c4eae0d 100644 Binary files a/docs/_static/videos/classic_control/pendulum.gif and b/docs/_static/videos/classic_control/pendulum.gif differ diff --git a/docs/_static/videos/mujoco/ant.gif b/docs/_static/videos/mujoco/ant.gif index 24d5eacea..d9892785a 100644 Binary files a/docs/_static/videos/mujoco/ant.gif and b/docs/_static/videos/mujoco/ant.gif differ diff --git a/docs/_static/videos/mujoco/half_cheetah.gif b/docs/_static/videos/mujoco/half_cheetah.gif index a1fcdf314..2db6ca568 100644 Binary files a/docs/_static/videos/mujoco/half_cheetah.gif and b/docs/_static/videos/mujoco/half_cheetah.gif differ diff --git a/docs/_static/videos/mujoco/hopper.gif b/docs/_static/videos/mujoco/hopper.gif index 181205ac0..9b50e7d0b 100644 Binary files a/docs/_static/videos/mujoco/hopper.gif and b/docs/_static/videos/mujoco/hopper.gif differ diff --git a/docs/_static/videos/mujoco/humanoid.gif b/docs/_static/videos/mujoco/humanoid.gif index 949de7319..93e5fa951 100644 Binary files a/docs/_static/videos/mujoco/humanoid.gif and b/docs/_static/videos/mujoco/humanoid.gif differ diff --git a/docs/_static/videos/mujoco/humanoid_standup.gif b/docs/_static/videos/mujoco/humanoid_standup.gif index bb118ad07..40ac612f6 100644 Binary files a/docs/_static/videos/mujoco/humanoid_standup.gif and b/docs/_static/videos/mujoco/humanoid_standup.gif differ diff --git a/docs/_static/videos/mujoco/inverted_double_pendulum.gif b/docs/_static/videos/mujoco/inverted_double_pendulum.gif index 5fbc35153..82a07f2e6 100644 Binary files a/docs/_static/videos/mujoco/inverted_double_pendulum.gif and b/docs/_static/videos/mujoco/inverted_double_pendulum.gif differ diff --git a/docs/_static/videos/mujoco/inverted_pendulum.gif b/docs/_static/videos/mujoco/inverted_pendulum.gif index 62d9abb18..b5792b5fe 100644 Binary files a/docs/_static/videos/mujoco/inverted_pendulum.gif and b/docs/_static/videos/mujoco/inverted_pendulum.gif differ diff --git a/docs/_static/videos/mujoco/pusher.gif b/docs/_static/videos/mujoco/pusher.gif index a1dca8dcb..c9058cb30 100644 Binary files a/docs/_static/videos/mujoco/pusher.gif and b/docs/_static/videos/mujoco/pusher.gif differ diff --git a/docs/_static/videos/mujoco/reacher.gif b/docs/_static/videos/mujoco/reacher.gif index d6d5fa4fa..eaed0127f 100644 Binary files a/docs/_static/videos/mujoco/reacher.gif and b/docs/_static/videos/mujoco/reacher.gif differ diff --git a/docs/_static/videos/mujoco/swimmer.gif b/docs/_static/videos/mujoco/swimmer.gif index db679e167..27a6089bb 100644 Binary files a/docs/_static/videos/mujoco/swimmer.gif and b/docs/_static/videos/mujoco/swimmer.gif differ diff --git a/docs/_static/videos/mujoco/walker2d.gif b/docs/_static/videos/mujoco/walker2d.gif index d6d256c36..81137f178 100644 Binary files a/docs/_static/videos/mujoco/walker2d.gif and b/docs/_static/videos/mujoco/walker2d.gif differ diff --git a/docs/_static/videos/toy_text/blackjack.gif b/docs/_static/videos/toy_text/blackjack.gif index c88b5fb07..5c5ab2058 100644 Binary files a/docs/_static/videos/toy_text/blackjack.gif and b/docs/_static/videos/toy_text/blackjack.gif differ diff --git a/docs/_static/videos/toy_text/cliff_walking.gif b/docs/_static/videos/toy_text/cliff_walking.gif index 41978369d..7aef17a2d 100644 Binary files a/docs/_static/videos/toy_text/cliff_walking.gif and b/docs/_static/videos/toy_text/cliff_walking.gif differ diff --git a/docs/_static/videos/toy_text/frozen_lake.gif b/docs/_static/videos/toy_text/frozen_lake.gif index 9af9274de..302b41476 100644 Binary files a/docs/_static/videos/toy_text/frozen_lake.gif and b/docs/_static/videos/toy_text/frozen_lake.gif differ diff --git a/docs/_static/videos/toy_text/taxi.gif b/docs/_static/videos/toy_text/taxi.gif index aecde4a2a..077094c98 100644 Binary files a/docs/_static/videos/toy_text/taxi.gif and b/docs/_static/videos/toy_text/taxi.gif differ diff --git a/docs/api/env.md b/docs/api/env.md index af3345b7e..fd5125a3b 100644 --- a/docs/api/env.md +++ b/docs/api/env.md @@ -54,6 +54,7 @@ title: Env .. autoproperty:: gymnasium.Env.unwrapped .. autoproperty:: gymnasium.Env.np_random +.. autoproperty:: gymnasium.Env.np_random_seed ``` ## Implementing environments @@ -61,7 +62,7 @@ title: Env ```{eval-rst} .. py:currentmodule:: gymnasium -When implementing an environment, the :meth:`Env.reset` and :meth:`Env.step` functions much be created describing the dynamics of the environment. For more information see the environment creation tutorial. +When implementing an environment, the :meth:`Env.reset` and :meth:`Env.step` functions must be created to describe the dynamics of the environment. For more information, see the environment creation tutorial. ``` ## Creating environments diff --git a/docs/api/spaces.md b/docs/api/spaces.md index 791cf43ac..9f749a66a 100644 --- a/docs/api/spaces.md +++ b/docs/api/spaces.md @@ -9,7 +9,6 @@ title: Spaces spaces/fundamental spaces/composite spaces/utils -vector/utils ``` ```{eval-rst} diff --git a/docs/api/vector.md b/docs/api/vector.md index 0d1887397..c1e3e7b86 100644 --- a/docs/api/vector.md +++ b/docs/api/vector.md @@ -67,6 +67,7 @@ vector/utils ```{eval-rst} .. autoproperty:: gymnasium.vector.VectorEnv.unwrapped .. autoproperty:: gymnasium.vector.VectorEnv.np_random +.. autoproperty:: gymnasium.vector.VectorEnv.np_random_seed ``` ## Making Vector Environments diff --git a/docs/api/vector/async_vector_env.md b/docs/api/vector/async_vector_env.md index a0368419e..14fdf5c50 100644 --- a/docs/api/vector/async_vector_env.md +++ b/docs/api/vector/async_vector_env.md @@ -11,3 +11,10 @@ .. automethod:: gymnasium.vector.AsyncVectorEnv.get_attr .. automethod:: gymnasium.vector.AsyncVectorEnv.set_attr ``` + +### Additional Methods + +```{eval-rst} +.. autoproperty:: gymnasium.vector.VectorEnv.np_random +.. autoproperty:: gymnasium.vector.VectorEnv.np_random_seed +``` diff --git a/docs/api/vector/sync_vector_env.md b/docs/api/vector/sync_vector_env.md index 3855e4820..1295f59f9 100644 --- a/docs/api/vector/sync_vector_env.md +++ b/docs/api/vector/sync_vector_env.md @@ -11,3 +11,10 @@ .. automethod:: gymnasium.vector.SyncVectorEnv.get_attr .. automethod:: gymnasium.vector.SyncVectorEnv.set_attr ``` + +### Additional Methods + +```{eval-rst} +.. autoproperty:: gymnasium.vector.VectorEnv.np_random +.. autoproperty:: gymnasium.vector.VectorEnv.np_random_seed +``` diff --git a/docs/api/wrappers.md b/docs/api/wrappers.md index 000faf061..9a3d1e50b 100644 --- a/docs/api/wrappers.md +++ b/docs/api/wrappers.md @@ -12,7 +12,6 @@ wrappers/misc_wrappers wrappers/action_wrappers wrappers/observation_wrappers wrappers/reward_wrappers -wrappers/vector_wrappers ``` ```{eval-rst} @@ -48,5 +47,6 @@ wrappers/vector_wrappers .. autoproperty:: gymnasium.Wrapper.spec .. autoproperty:: gymnasium.Wrapper.metadata .. autoproperty:: gymnasium.Wrapper.np_random +.. autoproperty:: gymnasium.Wrapper.np_random_seed .. autoproperty:: gymnasium.Wrapper.unwrapped ``` diff --git a/docs/api/wrappers/observation_wrappers.md b/docs/api/wrappers/observation_wrappers.md index 10284aca0..631d53203 100644 --- a/docs/api/wrappers/observation_wrappers.md +++ b/docs/api/wrappers/observation_wrappers.md @@ -18,7 +18,7 @@ .. autoclass:: gymnasium.wrappers.GrayscaleObservation .. autoclass:: gymnasium.wrappers.MaxAndSkipObservation .. autoclass:: gymnasium.wrappers.NormalizeObservation -.. autoclass:: gymnasium.wrappers.RenderObservation +.. autoclass:: gymnasium.wrappers.AddRenderObservation .. autoclass:: gymnasium.wrappers.ResizeObservation .. autoclass:: gymnasium.wrappers.ReshapeObservation .. autoclass:: gymnasium.wrappers.RescaleObservation diff --git a/docs/api/wrappers/table.md b/docs/api/wrappers/table.md index 240b9f1f9..be540339f 100644 --- a/docs/api/wrappers/table.md +++ b/docs/api/wrappers/table.md @@ -56,7 +56,7 @@ wrapper in the page on the wrapper type - Records videos of environment episodes using the environment's render function. * - :class:`RenderCollection` - Collect rendered frames of an environment such ``render`` returns a ``list[RenderedFrame]``. - * - :class:`RenderObservation` + * - :class:`AddRenderObservation` - Includes the rendered observations in the environment's observations. * - :class:`RescaleAction` - Affinely (linearly) rescales a ``Box`` action space of the environment to within the range of ``[min_action, max_action]``. diff --git a/docs/environments/atari/adventure.md b/docs/environments/atari/adventure.md index a06d0fcc4..b4326a0fe 100644 --- a/docs/environments/atari/adventure.md +++ b/docs/environments/atari/adventure.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Adventure has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/air_raid.md b/docs/environments/atari/air_raid.md index 0cb79cfd5..498c6b962 100644 --- a/docs/environments/atari/air_raid.md +++ b/docs/environments/atari/air_raid.md @@ -45,6 +45,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants AirRaid has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/alien.md b/docs/environments/atari/alien.md index 81c57b90d..b672dfe29 100644 --- a/docs/environments/atari/alien.md +++ b/docs/environments/atari/alien.md @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points by destroying eggs, killing aliens, using pulsars, and collecting special prizes. When you are caught -by an alien, you will lose one of your lives. The number of lives you have depends on the game flavor. For a -table of scores corresponding to the different achievements, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815). +## Rewards +You score points by destroying eggs, killing aliens, using pulsars, and collecting special prizes. When you are caught by an alien, you will lose one of your lives. The number of lives you have depends on the game flavor. For a table of scores corresponding to the different achievements, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815). ## Variants diff --git a/docs/environments/atari/amidar.md b/docs/environments/atari/amidar.md index 3ef9357ff..a6624aaae 100644 --- a/docs/environments/atari/amidar.md +++ b/docs/environments/atari/amidar.md @@ -48,10 +48,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points by traversing new parts of the grid. Coloring an entire box in the maze or catching chickens gives extra points. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817). +## Rewards +You score points by traversing new parts of the grid. Coloring an entire box in the maze or catching chickens gives extra points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817). ## Variants diff --git a/docs/environments/atari/assault.md b/docs/environments/atari/assault.md index b57e658ac..810c22bf5 100644 --- a/docs/environments/atari/assault.md +++ b/docs/environments/atari/assault.md @@ -21,7 +21,7 @@ For more Assault variants with different observation and action spaces, see the ## Description -You control a vehicle that can move sideways. A big mother ship circles overhead and continually deploys smaller drones.You must destroy these enemies and dodge their attacks. +You control a vehicle that can move sideways. A big mother ship circles overhead and continually deploys smaller drones. You must destroy these enemies and dodge their attacks. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=827) @@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Assault has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/asterix.md b/docs/environments/atari/asterix.md index 21599d014..66232ae91 100644 --- a/docs/environments/atari/asterix.md +++ b/docs/environments/atari/asterix.md @@ -47,9 +47,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -A table of scores awarded for collecting the different objects is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325). +## Rewards +A table of scores awarded for collecting the different objects is provided on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325). ## Variants diff --git a/docs/environments/atari/asteroids.md b/docs/environments/atari/asteroids.md index a94cbd8bf..a1be05d51 100644 --- a/docs/environments/atari/asteroids.md +++ b/docs/environments/atari/asteroids.md @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points for destroying asteroids, satellites and UFOs. The smaller the asteroid, the more points you score -for destroying it. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL). +## Rewards +You score points for destroying asteroids, satellites and UFOs. The smaller the asteroid, the more points you score for destroying it. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL). ## Variants diff --git a/docs/environments/atari/atlantis.md b/docs/environments/atari/atlantis.md index 4348bd59b..e9e19f952 100644 --- a/docs/environments/atari/atlantis.md +++ b/docs/environments/atari/atlantis.md @@ -21,7 +21,7 @@ For more Atlantis variants with different observation and action spaces, see the ## Description -Your job is to defend the submerged city of Atlantis. Your enemies slowly descend towards the city and you must destroy them before they reach striking distance. To this end, you control three defense posts.You lose if your enemies manage to destroy all seven of Atlantis' installations. You may rebuild installations after you have fought of a wave of enemies and scored a sufficient number of points. +Your job is to defend the submerged city of Atlantis. Your enemies slowly descend towards the city and you must destroy them before they reach striking distance. To this end, you control three defense posts. You lose if your enemies manage to destroy all seven of Atlantis' installations. You may rebuild installations after you have fought of a wave of enemies and scored a sufficient number of points. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835) @@ -46,11 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points for destroying enemies, keeping installations protected during attack waves. You score more points -if you manage to destroy your enemies with one of the outer defense posts. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835). +## Rewards +You score points for destroying enemies, keeping installations protected during attack waves. You score more points if you manage to destroy your enemies with one of the outer defense posts. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835). ## Variants diff --git a/docs/environments/atari/atlantis2.md b/docs/environments/atari/atlantis2.md index 30b09f7ef..ad32dafd7 100644 --- a/docs/environments/atari/atlantis2.md +++ b/docs/environments/atari/atlantis2.md @@ -45,6 +45,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Atlantis2 has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/backgammon.md b/docs/environments/atari/backgammon.md index 5d00fa245..24f2235e2 100644 --- a/docs/environments/atari/backgammon.md +++ b/docs/environments/atari/backgammon.md @@ -46,6 +46,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Backgammon has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/bank_heist.md b/docs/environments/atari/bank_heist.md index 7c199f964..a07da8ea6 100644 --- a/docs/environments/atari/bank_heist.md +++ b/docs/environments/atari/bank_heist.md @@ -21,7 +21,7 @@ For more BankHeist variants with different observation and action spaces, see th ## Description -You are a bank robber and (naturally) want to rob as many banks as possible. You control your getaway car and must navigate maze-like cities. The police chases you and will appear whenever you rob a bank. You may destroy police cars by dropping sticks of dynamite. You can fill up your gas tank by entering a new city.At the beginning of the game you have four lives. Lives are lost if you run out of gas, are caught by the police,or run over the dynamite you have previously dropped. +You are a bank robber and (naturally) want to rob as many banks as possible. You control your getaway car and must navigate maze-like cities. The police chases you and will appear whenever you rob a bank. You may destroy police cars by dropping sticks of dynamite. You can fill up your gas tank by entering a new city. At the beginning of the game you have four lives. Lives are lost if you run out of gas, are caught by the police,or run over the dynamite you have previously dropped. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008) @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points for robbing banks and destroying police cars. If you rob nine or more banks, and then leave the city, -you will score extra points. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008). +## Rewards +You score points for robbing banks and destroying police cars. If you rob nine or more banks, and then leave the city, you will score extra points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008). ## Variants diff --git a/docs/environments/atari/basic_math.md b/docs/environments/atari/basic_math.md index e9a454b1c..acefc3860 100644 --- a/docs/environments/atari/basic_math.md +++ b/docs/environments/atari/basic_math.md @@ -21,8 +21,7 @@ For more BasicMath variants with different observation and action spaces, see th ## Description -You must solve basic math problems using a joystick -to scroll to the correct numeric answer. +You must solve basic math problems using a joystick to scroll to the correct numeric answer. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=14) @@ -48,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants BasicMath has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/battle_zone.md b/docs/environments/atari/battle_zone.md index 0b096805f..5e6634f50 100644 --- a/docs/environments/atari/battle_zone.md +++ b/docs/environments/atari/battle_zone.md @@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You receive points for destroying enemies. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL). +## Rewards +You receive points for destroying enemies. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL). ## Variants diff --git a/docs/environments/atari/beam_rider.md b/docs/environments/atari/beam_rider.md index 6d4a44654..a920b34fc 100644 --- a/docs/environments/atari/beam_rider.md +++ b/docs/environments/atari/beam_rider.md @@ -47,10 +47,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points for destroying enemies. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL). +## Rewards +You score points for destroying enemies. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL). ## Variants diff --git a/docs/environments/atari/berzerk.md b/docs/environments/atari/berzerk.md index 053c70f13..afdc8e368 100644 --- a/docs/environments/atari/berzerk.md +++ b/docs/environments/atari/berzerk.md @@ -21,7 +21,7 @@ For more Berzerk variants with different observation and action spaces, see the ## Description -You are stuck in a maze with evil robots. You must destroy them and avoid touching the walls of the maze, as this will kill you. You may be awarded extra lives after scoring a sufficient number of points, depending on the game mode.You may also be chased by an undefeatable enemy, Evil Otto, that you must avoid. Evil Otto does not appear in the default mode. +You are stuck in a maze with evil robots. You must destroy them and avoid touching the walls of the maze, as this will kill you. You may be awarded extra lives after scoring a sufficient number of points, depending on the game mode. You may also be chased by an undefeatable enemy, Evil Otto, that you must avoid. Evil Otto does not appear in the default mode. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=866) @@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points for destroying robots. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL). +## Rewards +You score points for destroying robots. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL). ## Variants diff --git a/docs/environments/atari/blackjack.md b/docs/environments/atari/blackjack.md index 70dff74dc..e4a59b3b3 100644 --- a/docs/environments/atari/blackjack.md +++ b/docs/environments/atari/blackjack.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Blackjack has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/bowling.md b/docs/environments/atari/bowling.md index 32982c8b5..ccdaaee56 100644 --- a/docs/environments/atari/bowling.md +++ b/docs/environments/atari/bowling.md @@ -46,12 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You receive points for knocking down pins. The exact score depends on whether you manage a "strike", "spare" or "open" -frame. Moreover, the points you score for one frame may depend on following frames. -You can score up to 300 points in one game (if you manage to do 12 strikes). -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879). +## Rewards +You receive points for knocking down pins. The exact score depends on whether you manage a "strike", "spare" or "open" frame. Moreover, the points you score for one frame may depend on following frames. You can score up to 300 points in one game (if you manage to do 12 strikes). For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879). ## Variants diff --git a/docs/environments/atari/boxing.md b/docs/environments/atari/boxing.md index f0d2620bc..e6533a187 100644 --- a/docs/environments/atari/boxing.md +++ b/docs/environments/atari/boxing.md @@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points by landing punches. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882). +## Rewards +You score points by landing punches. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882). ## Variants diff --git a/docs/environments/atari/breakout.md b/docs/environments/atari/breakout.md index 42b0ba1e5..7fc8a9585 100644 --- a/docs/environments/atari/breakout.md +++ b/docs/environments/atari/breakout.md @@ -46,10 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points by destroying bricks in the wall. The reward for destroying a brick depends on the color of the brick. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889). +## Rewards +You score points by destroying bricks in the wall. The reward for destroying a brick depends on the color of the brick. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889). ## Variants diff --git a/docs/environments/atari/carnival.md b/docs/environments/atari/carnival.md index eb9112874..c897d6611 100644 --- a/docs/environments/atari/carnival.md +++ b/docs/environments/atari/carnival.md @@ -46,11 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points by destroying targets. Points (or bullets) may be subtracted if you hit the target when it shows a minus sign. -You will score extra points if it shows a plus sign! -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908). +## Rewards +You score points by destroying targets. Points (or bullets) may be subtracted if you hit the target when it shows a minus sign. You will score extra points if it shows a plus sign! For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908). ## Variants diff --git a/docs/environments/atari/casino.md b/docs/environments/atari/casino.md index 8460c0ffc..a88002de8 100644 --- a/docs/environments/atari/casino.md +++ b/docs/environments/atari/casino.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Casino has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/centipede.md b/docs/environments/atari/centipede.md index 6e30d39f5..5d9e09fe8 100644 --- a/docs/environments/atari/centipede.md +++ b/docs/environments/atari/centipede.md @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points by hitting centipedes, scorpions, fleas and spiders. Additional points are awarded after every round -(i.e. after you have lost a wand) for mushrooms that were not destroyed. -Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911). +## Rewards +You score points by hitting centipedes, scorpions, fleas and spiders. Additional points are awarded after every round (i.e. after you have lost a wand) for mushrooms that were not destroyed. Detailed documentation can be found on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911). ## Variants diff --git a/docs/environments/atari/chopper_command.md b/docs/environments/atari/chopper_command.md index 1a747bd75..a51883698 100644 --- a/docs/environments/atari/chopper_command.md +++ b/docs/environments/atari/chopper_command.md @@ -21,7 +21,7 @@ For more ChopperCommand variants with different observation and action spaces, s ## Description -You control a helicopter and must protect truck convoys. To that end, you need to shoot down enemy aircraft.A mini-map is displayed at the bottom of the screen. +You control a helicopter and must protect truck convoys. To that end, you need to shoot down enemy aircraft. A mini-map is displayed at the bottom of the screen. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921) @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points by destroying planes and other helicopters. You score extra points at the end of every wave, depending on the number -of trucks that have survived. -Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921). +## Rewards +You score points by destroying planes and other helicopters. You score extra points at the end of every wave, depending on the number of trucks that have survived. Detailed documentation can be found on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921). ## Variants diff --git a/docs/environments/atari/crazy_climber.md b/docs/environments/atari/crazy_climber.md index 15469c4f6..9d6c0f2c3 100644 --- a/docs/environments/atari/crazy_climber.md +++ b/docs/environments/atari/crazy_climber.md @@ -47,9 +47,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -A table of scores awarded for completing each row of a building is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113). +## Rewards +A table of scores awarded for completing each row of a building is provided on the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113). ## Variants diff --git a/docs/environments/atari/crossbow.md b/docs/environments/atari/crossbow.md index bfd28a6c0..9aece246d 100644 --- a/docs/environments/atari/crossbow.md +++ b/docs/environments/atari/crossbow.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Crossbow has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/darkchambers.md b/docs/environments/atari/darkchambers.md index 0912da58d..4fc9d1049 100644 --- a/docs/environments/atari/darkchambers.md +++ b/docs/environments/atari/darkchambers.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Darkchambers has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/defender.md b/docs/environments/atari/defender.md index 2478a76d8..10acc2d17 100644 --- a/docs/environments/atari/defender.md +++ b/docs/environments/atari/defender.md @@ -21,7 +21,7 @@ For more Defender variants with different observation and action spaces, see the ## Description -Aliens attack the earth. You control a spaceship and must defend humanity by destroying alien ships and rescuing humanoids.You have three lives and three smart bombs. You lose a live when you are shot down by an alien spaceship.Points are scored by destroying enemies and retrieving humans that are being abducted. You have an unlimited number of laser missiles. +Aliens attack the earth. You control a spaceship and must defend humanity by destroying alien ships and rescuing humanoids. You have three lives and three smart bombs. You lose a live when you are shot down by an alien spaceship. Points are scored by destroying enemies and retrieving humans that are being abducted. You have an unlimited number of laser missiles. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128) @@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You receive points for destroying enemies, rescuing abducted humans and keeping humans alive. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128). +## Rewards +You receive points for destroying enemies, rescuing abducted humans and keeping humans alive. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128). ## Variants diff --git a/docs/environments/atari/demon_attack.md b/docs/environments/atari/demon_attack.md index 81a72a753..24e833b28 100644 --- a/docs/environments/atari/demon_attack.md +++ b/docs/environments/atari/demon_attack.md @@ -46,11 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -Each enemy you slay gives you points. The amount of points depends on the type of demon and which -wave you are in. A detailed table of scores is provided on [the AtariAge -page](https://atariage.com/manual_html_page.php?SoftwareLabelID=135). +## Rewards +Each enemy you slay gives you points. The amount of points depends on the type of demon and which wave you are in. A detailed table of scores is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=135). ## Variants diff --git a/docs/environments/atari/donkey_kong.md b/docs/environments/atari/donkey_kong.md index 7af67c0ea..64e5bf599 100644 --- a/docs/environments/atari/donkey_kong.md +++ b/docs/environments/atari/donkey_kong.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants DonkeyKong has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/double_dunk.md b/docs/environments/atari/double_dunk.md index 39f419a3e..6cbfbce57 100644 --- a/docs/environments/atari/double_dunk.md +++ b/docs/environments/atari/double_dunk.md @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -Scores follow the rules of basketball. You can get either 3 points, 2 points foul line) depending -from where you shoot. After a defensive foul, a successful shot from the foul line gives you 1 -point. +## Rewards +Scores follow the rules of basketball. You can get either 3 points, 2 points foul line) depending from where you shoot. After a defensive foul, a successful shot from the foul line gives you 1 point. ## Variants diff --git a/docs/environments/atari/earthworld.md b/docs/environments/atari/earthworld.md index 04143cf2c..c0ef39d2a 100644 --- a/docs/environments/atari/earthworld.md +++ b/docs/environments/atari/earthworld.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Earthworld has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/elevator_action.md b/docs/environments/atari/elevator_action.md index cf8ef1d15..e700fe099 100644 --- a/docs/environments/atari/elevator_action.md +++ b/docs/environments/atari/elevator_action.md @@ -21,7 +21,7 @@ For more ElevatorAction variants with different observation and action spaces, s ## Description -You are a secret agent that must retrieve some secret documents and reach the ground level of a building by going down an elevator/stairs. Once you reach the ground level, you are picked up and taken to the next level. You are equipped with a gun to defend yourself against enemy agents waiting for you in each floor. You gather points by shooting down enemy agents and visiting apartments marked with a red door, which contain the secret documents.This is an unreleased prototype based on the arcade game. +You are a secret agent that must retrieve some secret documents and reach the ground level of a building by going down an elevator/stairs. Once you reach the ground level, you are picked up and taken to the next level. You are equipped with a gun to defend yourself against enemy agents waiting for you in each floor. You gather points by shooting down enemy agents and visiting apartments marked with a red door, which contain the secret documents. This is an unreleased prototype based on the arcade game. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1131) @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You start with 4 lives and are awarded 100 points for each enemy shot, and 500 points for each -secret document collected (visiting a red door). Each time you get shot you lose one life and the -game ends when losing all lives. +## Rewards +You start with 4 lives and are awarded 100 points for each enemy shot, and 500 points for each secret document collected (visiting a red door). Each time you get shot you lose one life and the game ends when losing all lives. ## Variants diff --git a/docs/environments/atari/enduro.md b/docs/environments/atari/enduro.md index bf8b7eeb3..a1865cf40 100644 --- a/docs/environments/atari/enduro.md +++ b/docs/environments/atari/enduro.md @@ -47,8 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - +## Rewards You get 1 point for each vehicle you overtake. ## Variants diff --git a/docs/environments/atari/entombed.md b/docs/environments/atari/entombed.md index 7e2583ef9..fcd4c7e46 100644 --- a/docs/environments/atari/entombed.md +++ b/docs/environments/atari/entombed.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Entombed has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/et.md b/docs/environments/atari/et.md index a5d6b7ca6..1e791649d 100644 --- a/docs/environments/atari/et.md +++ b/docs/environments/atari/et.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Et has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/fishing_derby.md b/docs/environments/atari/fishing_derby.md index 9997f1a89..27d360228 100644 --- a/docs/environments/atari/fishing_derby.md +++ b/docs/environments/atari/fishing_derby.md @@ -49,12 +49,10 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - +## Rewards The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=182). -Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1). ## Variants FishingDerby has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/flag_capture.md b/docs/environments/atari/flag_capture.md index f8d6079cb..79e95187e 100644 --- a/docs/environments/atari/flag_capture.md +++ b/docs/environments/atari/flag_capture.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants FlagCapture has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/freeway.md b/docs/environments/atari/freeway.md index 7b7de6659..1a0508877 100644 --- a/docs/environments/atari/freeway.md +++ b/docs/environments/atari/freeway.md @@ -45,12 +45,10 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - +## Rewards The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=192). -Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1). ## Variants Freeway has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/frogger.md b/docs/environments/atari/frogger.md index e503fd30b..2cc25da9c 100644 --- a/docs/environments/atari/frogger.md +++ b/docs/environments/atari/frogger.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Frogger has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/frostbite.md b/docs/environments/atari/frostbite.md index 25277493b..c94230adb 100644 --- a/docs/environments/atari/frostbite.md +++ b/docs/environments/atari/frostbite.md @@ -49,12 +49,9 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards +## Rewards +The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=199). -The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can -find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=199). - -Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1). ## Variants Frostbite has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/galaxian.md b/docs/environments/atari/galaxian.md index 03a34880f..b6f2af409 100644 --- a/docs/environments/atari/galaxian.md +++ b/docs/environments/atari/galaxian.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Galaxian has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/gopher.md b/docs/environments/atari/gopher.md index bb0aaae4d..522693175 100644 --- a/docs/environments/atari/gopher.md +++ b/docs/environments/atari/gopher.md @@ -47,12 +47,9 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards +## Rewards +The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=218). -The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can -find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=218). - -Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1). ## Variants Gopher has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/gravitar.md b/docs/environments/atari/gravitar.md index af046496f..d29a23a4e 100644 --- a/docs/environments/atari/gravitar.md +++ b/docs/environments/atari/gravitar.md @@ -49,12 +49,9 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards +## Rewards +The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=223). -The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can -find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=223). - -Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1). ## Variants Gravitar has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/hangman.md b/docs/environments/atari/hangman.md index 7955bad98..2d7cfdd45 100644 --- a/docs/environments/atari/hangman.md +++ b/docs/environments/atari/hangman.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Hangman has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/haunted_house.md b/docs/environments/atari/haunted_house.md index d1c766d97..1ff8c84d3 100644 --- a/docs/environments/atari/haunted_house.md +++ b/docs/environments/atari/haunted_house.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants HauntedHouse has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/hero.md b/docs/environments/atari/hero.md index 39c84aad3..04205d156 100644 --- a/docs/environments/atari/hero.md +++ b/docs/environments/atari/hero.md @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points for shooting critters, rescuing miners, and dynamiting walls. -Extra points are rewarded for any power remaining after rescuing a miner. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228). +## Rewards +You score points for shooting critters, rescuing miners, and dynamiting walls. Extra points are rewarded for any power remaining after rescuing a miner. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228). ## Variants diff --git a/docs/environments/atari/human_cannonball.md b/docs/environments/atari/human_cannonball.md index affd94b18..c607108c5 100644 --- a/docs/environments/atari/human_cannonball.md +++ b/docs/environments/atari/human_cannonball.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants HumanCannonball has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/ice_hockey.md b/docs/environments/atari/ice_hockey.md index a77a68788..6206560d8 100644 --- a/docs/environments/atari/ice_hockey.md +++ b/docs/environments/atari/ice_hockey.md @@ -21,7 +21,7 @@ For more IceHockey variants with different observation and action spaces, see th ## Description -Your goal is to score as many points as possible in a standard game of Ice Hockey over a 3-minute time period. The ball is usually called "the puck".There are 32 shot angles ranging from the extreme left to the extreme right. The angles can only aim towards the opponent's goal.Just as in real hockey, you can pass the puck by shooting it off the sides of the rink. This can be really key when you're in position to score a goal. +Your goal is to score as many points as possible in a standard game of Ice Hockey over a 3-minute time period. The ball is usually called "the puck".There are 32 shot angles ranging from the extreme left to the extreme right. The angles can only aim towards the opponent's goal. Just as in real hockey, you can pass the puck by shooting it off the sides of the rink. This can be really key when you're in position to score a goal. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241) @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points by shooting the puck into your opponent's goal. Your opponent scores in the same manner. -There are no limits to how many points you can get per game, other than the time limit of 3-minute games. -For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241). +## Rewards +You score points by shooting the puck into your opponent's goal. Your opponent scores in the same manner. There are no limits to how many points you can get per game, other than the time limit of 3-minute games.For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241). ## Variants diff --git a/docs/environments/atari/jamesbond.md b/docs/environments/atari/jamesbond.md index b67917e88..f717ae504 100644 --- a/docs/environments/atari/jamesbond.md +++ b/docs/environments/atari/jamesbond.md @@ -21,7 +21,7 @@ For more Jamesbond variants with different observation and action spaces, see th ## Description -Your mission is to control Mr. Bond's specially designed multipurpose craft to complete a variety of missions.The craft moves forward with a right motion and slightly back with a left motion.An up or down motion causes the craft to jump or dive.You can also fire by either lobbing a bomb to the bottom of the screen or firing a fixed angle shot to the top of the screen. +Your mission is to control Mr. Bond's specially designed multipurpose craft to complete a variety of missions. The craft moves forward with a right motion and slightly back with a left motion. An up or down motion causes the craft to jump or dive. You can also fire by either lobbing a bomb to the bottom of the screen or firing a fixed angle shot to the top of the screen. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250) @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -The game ends when you complete the last mission or when you lose the last craft. In either case, you'll receive your final score. -There will be a rating based on your score. The highest rating in NOVICE is 006. The highest rating in AGENT is 007. -For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250). +## Rewards +The game ends when you complete the last mission or when you lose the last craft. In either case, you'll receive your final score. There will be a rating based on your score. The highest rating in NOVICE is 006. The highest rating in AGENT is 007. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250). ## Variants diff --git a/docs/environments/atari/journey_escape.md b/docs/environments/atari/journey_escape.md index cbcd13bc0..251cf846b 100644 --- a/docs/environments/atari/journey_escape.md +++ b/docs/environments/atari/journey_escape.md @@ -21,7 +21,7 @@ For more JourneyEscape variants with different observation and action spaces, se ## Description -You must lead all 5 members of JOURNEY through waves of pesky characters and backstage obstacles to the Scarab Escape Vehicle before time runs out.You must also protect $50,000 in concert cash from grasping groupies, photographers, and promoters. +You must lead all 5 members of JOURNEY through waves of pesky characters and backstage obstacles to the Scarab Escape Vehicle before time runs out. You must also protect $50,000 in concert cash from grasping groupies, photographers, and promoters. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252) @@ -50,11 +50,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -At the start of the game, you will have $50,000 and 60 units of time. -Your end game score with be dependent on how much time you have remaining and who you encounter along the way. -For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252). +## Rewards +At the start of the game, you will have $50,000 and 60 units of time. Your end game score with be dependent on how much time you have remaining and who you encounter along the way. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252). ## Variants diff --git a/docs/environments/atari/kaboom.md b/docs/environments/atari/kaboom.md index a4933394d..8c6871d89 100644 --- a/docs/environments/atari/kaboom.md +++ b/docs/environments/atari/kaboom.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Kaboom has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/kangaroo.md b/docs/environments/atari/kangaroo.md index 2db659ce0..ba8e757e6 100644 --- a/docs/environments/atari/kangaroo.md +++ b/docs/environments/atari/kangaroo.md @@ -21,7 +21,7 @@ For more Kangaroo variants with different observation and action spaces, see the ## Description -The object of the game is to score as many points as you can while controlling Mother Kangaroo to rescue her precious baby. You start the game with three lives.During this rescue mission, Mother Kangaroo encounters many obstacles. You need to help her climb ladders, pick bonus fruit, and throw punches at monkeys. +The object of the game is to score as many points as you can while controlling Mother Kangaroo to rescue her precious baby. You start the game with three lives. During this rescue mission, Mother Kangaroo encounters many obstacles. You need to help her climb ladders, pick bonus fruit, and throw punches at monkeys. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923) @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -Your score will be shown at the top right corner of the game. -Your end game score with be dependent on how much time you have remaining and who you encounter along the way. -For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923). +## Rewards +Your score will be shown at the top right corner of the game. Your end game score with be dependent on how much time you have remaining and who you encounter along the way. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923). ## Variants diff --git a/docs/environments/atari/keystone_kapers.md b/docs/environments/atari/keystone_kapers.md index 39cdc6e49..7d4f4e11d 100644 --- a/docs/environments/atari/keystone_kapers.md +++ b/docs/environments/atari/keystone_kapers.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants KeystoneKapers has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/king_kong.md b/docs/environments/atari/king_kong.md index 7e00cf31d..dcc776207 100644 --- a/docs/environments/atari/king_kong.md +++ b/docs/environments/atari/king_kong.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants KingKong has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/klax.md b/docs/environments/atari/klax.md index 62d7430cb..46af8805d 100644 --- a/docs/environments/atari/klax.md +++ b/docs/environments/atari/klax.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Klax has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/koolaid.md b/docs/environments/atari/koolaid.md index 21ef16c1d..fd07c8b1c 100644 --- a/docs/environments/atari/koolaid.md +++ b/docs/environments/atari/koolaid.md @@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Koolaid has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/krull.md b/docs/environments/atari/krull.md index 94ebbefc9..234ce584a 100644 --- a/docs/environments/atari/krull.md +++ b/docs/environments/atari/krull.md @@ -21,7 +21,7 @@ For more Krull variants with different observation and action spaces, see the va ## Description -Your mission is to find and enter the Beast's Black Fortress, rescue Princess Lyssa, and destroy the Beast.The task is not an easy one, for the location of the Black Fortress changes with each sunrise on Krull. +Your mission is to find and enter the Beast's Black Fortress, rescue Princess Lyssa, and destroy the Beast. The task is not an easy one, for the location of the Black Fortress changes with each sunrise on Krull. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267) @@ -49,11 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You will receive various scores for each monster you kill. -You can play the game until you have lost all your lives. -For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267). +## Rewards +You will receive various scores for each monster you kill. You can play the game until you have lost all your lives. For a more detailed documentation, consult the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267). ## Variants diff --git a/docs/environments/atari/kung_fu_master.md b/docs/environments/atari/kung_fu_master.md index 1820a92ce..3c54193c9 100644 --- a/docs/environments/atari/kung_fu_master.md +++ b/docs/environments/atari/kung_fu_master.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants KungFuMaster has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/laser_gates.md b/docs/environments/atari/laser_gates.md index f28dd9d9f..8a363d5fa 100644 --- a/docs/environments/atari/laser_gates.md +++ b/docs/environments/atari/laser_gates.md @@ -21,7 +21,7 @@ For more LaserGates variants with different observation and action spaces, see t ## Description -The Cryptic Computer is malfunctioning! Use your Dante Dart to navigate through the computer and deestroy the four Failsafe Detonators. +The Cryptic Computer is malfunctioning! Use your Dante Dart to navigate through the computer and destroy the four Failsafe Detonators. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=271) @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants LaserGates has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/lost_luggage.md b/docs/environments/atari/lost_luggage.md index 0c8419b2c..f9b46603c 100644 --- a/docs/environments/atari/lost_luggage.md +++ b/docs/environments/atari/lost_luggage.md @@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants LostLuggage has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/mario_bros.md b/docs/environments/atari/mario_bros.md index 93b919c08..2a47c5c8b 100644 --- a/docs/environments/atari/mario_bros.md +++ b/docs/environments/atari/mario_bros.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants MarioBros has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/miniature_golf.md b/docs/environments/atari/miniature_golf.md index 8939fc114..59fe0b0ec 100644 --- a/docs/environments/atari/miniature_golf.md +++ b/docs/environments/atari/miniature_golf.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants MiniatureGolf has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/montezuma_revenge.md b/docs/environments/atari/montezuma_revenge.md index 42e4d978d..748c9681b 100644 --- a/docs/environments/atari/montezuma_revenge.md +++ b/docs/environments/atari/montezuma_revenge.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants MontezumaRevenge has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/mr_do.md b/docs/environments/atari/mr_do.md index c2292acbc..50900e745 100644 --- a/docs/environments/atari/mr_do.md +++ b/docs/environments/atari/mr_do.md @@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants MrDo has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/ms_pacman.md b/docs/environments/atari/ms_pacman.md index 012dca5d0..716b39485 100644 --- a/docs/environments/atari/ms_pacman.md +++ b/docs/environments/atari/ms_pacman.md @@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants MsPacman has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/name_this_game.md b/docs/environments/atari/name_this_game.md index 6daa449bf..ab825d665 100644 --- a/docs/environments/atari/name_this_game.md +++ b/docs/environments/atari/name_this_game.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants NameThisGame has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/othello.md b/docs/environments/atari/othello.md index edfaf4d89..9e0529617 100644 --- a/docs/environments/atari/othello.md +++ b/docs/environments/atari/othello.md @@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Othello has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/pacman.md b/docs/environments/atari/pacman.md index 090700a4a..e593a66c9 100644 --- a/docs/environments/atari/pacman.md +++ b/docs/environments/atari/pacman.md @@ -45,6 +45,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Pacman has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/phoenix.md b/docs/environments/atari/phoenix.md index 464bbe8fc..6b693ede3 100644 --- a/docs/environments/atari/phoenix.md +++ b/docs/environments/atari/phoenix.md @@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Phoenix has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/pitfall.md b/docs/environments/atari/pitfall.md index f5926ab18..bb1d7b54b 100644 --- a/docs/environments/atari/pitfall.md +++ b/docs/environments/atari/pitfall.md @@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You get score points for collecting treasure, you lose points through some misfortunes like falling down a hole. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360). +## Rewards +You get score points for collecting treasure, you lose points through some misfortunes like falling down a hole. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360). ## Variants diff --git a/docs/environments/atari/pitfall2.md b/docs/environments/atari/pitfall2.md index 76ac25e47..68c8b0915 100644 --- a/docs/environments/atari/pitfall2.md +++ b/docs/environments/atari/pitfall2.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Pitfall2 has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/pong.md b/docs/environments/atari/pong.md index 6582dc0dc..b58686e91 100644 --- a/docs/environments/atari/pong.md +++ b/docs/environments/atari/pong.md @@ -46,10 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You get score points for getting the ball to pass the opponent's paddle. You lose points if the ball passes your paddle. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587). +## Rewards +You get score points for getting the ball to pass the opponent's paddle. You lose points if the ball passes your paddle. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587). ## Variants diff --git a/docs/environments/atari/pooyan.md b/docs/environments/atari/pooyan.md index cfd585118..55c763d98 100644 --- a/docs/environments/atari/pooyan.md +++ b/docs/environments/atari/pooyan.md @@ -46,10 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -If you hit a balloon, wolf or stone with an arrow you score points. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372). +## Rewards +If you hit a balloon, wolf or stone with an arrow you score points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372). ## Variants diff --git a/docs/environments/atari/private_eye.md b/docs/environments/atari/private_eye.md index 58a79fbad..d4c9e7db2 100644 --- a/docs/environments/atari/private_eye.md +++ b/docs/environments/atari/private_eye.md @@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points for completing your tasks like gathering evidence, nabbing questionable characters or closing cases etc. You lose points if you get hit or if your auto is on a pothole. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376). +## Rewards +You score points for completing your tasks like gathering evidence, nabbing questionable characters or closing cases etc. You lose points if you get hit or if your auto is on a pothole.For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376). ## Variants diff --git a/docs/environments/atari/qbert.md b/docs/environments/atari/qbert.md index 9e5966ded..126451688 100644 --- a/docs/environments/atari/qbert.md +++ b/docs/environments/atari/qbert.md @@ -46,10 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points for changing color of the cubes to their destination colors or by defeating enemies. You also gain points for completing a level. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL). +## Rewards +You score points for changing color of the cubes to their destination colors or by defeating enemies. You also gain points for completing a level. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL). ## Variants diff --git a/docs/environments/atari/riverraid.md b/docs/environments/atari/riverraid.md index 753beeb24..29a4d5718 100644 --- a/docs/environments/atari/riverraid.md +++ b/docs/environments/atari/riverraid.md @@ -21,7 +21,7 @@ For more Riverraid variants with different observation and action spaces, see th ## Description -You control a jet that flies over a river: you can move it sideways and fire missiles to destroy enemy objects. Each time an enemy object is destroyed you score points (i.e. rewards).You lose a jet when you run out of fuel: fly over a fuel depot when you begin to run low.You lose a jet even when it collides with the river bank or one of the enemy objects (except fuel depots).The game begins with a squadron of three jets in reserve and you're given an additional jet (up to 9) for each 10,000 points you score. +You control a jet that flies over a river: you can move it sideways and fire missiles to destroy enemy objects. Each time an enemy object is destroyed you score points (i.e. rewards).You lose a jet when you run out of fuel: fly over a fuel depot when you begin to run low. You lose a jet even when it collides with the river bank or one of the enemy objects (except fuel depots).The game begins with a squadron of three jets in reserve and you're given an additional jet (up to 9) for each 10,000 points you score. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409) @@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - +## Rewards Score points are your only reward. You get score points each time you destroy an enemy object: - | Enemy Object | Score Points | |--------------|--------------| | Tanker | 30 | @@ -60,8 +58,7 @@ Score points are your only reward. You get score points each time you destroy an | Fuel Depot | 80 | | Jet | 100 | | Bridge | 500 | - -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409). +For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409). ## Variants diff --git a/docs/environments/atari/road_runner.md b/docs/environments/atari/road_runner.md index 993e56380..7121d3051 100644 --- a/docs/environments/atari/road_runner.md +++ b/docs/environments/atari/road_runner.md @@ -21,7 +21,7 @@ For more RoadRunner variants with different observation and action spaces, see t ## Description -You control the Road Runner(TM) in a race; you can control the direction to run in and times to jumps.The goal is to outrun Wile E. Coyote(TM) while avoiding the hazards of the desert.The game begins with three lives. You lose a life when the coyote catches you, picks you up in a rocket, or shoots you with a cannon. You also lose a life when a truck hits you, you hit a land mine, you fall off a cliff,or you get hit by a falling rock.You score points (i.e. rewards) by eating seeds along the road, eating steel shot, and destroying the coyote. +You control the Road Runner(TM) in a race; you can control the direction to run in and times to jumps. The goal is to outrun Wile E. Coyote(TM) while avoiding the hazards of the desert. The game begins with three lives. You lose a life when the coyote catches you, picks you up in a rocket, or shoots you with a cannon. You also lose a life when a truck hits you, you hit a land mine, you fall off a cliff,or you get hit by a falling rock. You score points (i.e. rewards) by eating seeds along the road, eating steel shot, and destroying the coyote. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412) @@ -49,18 +49,15 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - +## Rewards Score points are your only reward. You get score points each time you: - | actions | points | |-------------------------------------------------------|--------| | eat a pile of birdseed | 100 | | eat steel shot | 100 | | get the coyote hit by a mine (cannonball, rock, etc.) | 200 | | get the coyote hit by a truck | 1000 | - -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412). +For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412). ## Variants diff --git a/docs/environments/atari/robotank.md b/docs/environments/atari/robotank.md index fb863a709..285b11d06 100644 --- a/docs/environments/atari/robotank.md +++ b/docs/environments/atari/robotank.md @@ -21,7 +21,7 @@ For more Robotank variants with different observation and action spaces, see the ## Description -You control your Robot Tanks to destroy enemies and avoid enemy fire.Game ends when all of your Robot Tanks are destroyed or all 12 enemy squadrons are destroyed.The game begins with one active Robot Tank and three reserves.Your Robot Tank may get lost when it is hit by enemy rocket fire - your video scrambles with static interference when this happens - or just become damaged - sensors report the damage by flashing on your control panel (look at V/C/R/T squares).You earn one bonus Robot Tank for every enemy squadron destroyed. The maximum number of bonus Robot Tanks allowed at any one time is 12. +You control your Robot Tanks to destroy enemies and avoid enemy fire. Game ends when all of your Robot Tanks are destroyed or all 12 enemy squadrons are destroyed. The game begins with one active Robot Tank and three reserves. Your Robot Tank may get lost when it is hit by enemy rocket fire - your video scrambles with static interference when this happens - or just become damaged - sensors report the damage by flashing on your control panel (look at V/C/R/T squares).You earn one bonus Robot Tank for every enemy squadron destroyed. The maximum number of bonus Robot Tanks allowed at any one time is 12. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416) @@ -49,15 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -The number of enemies destroyed is the only reward. - -A small tank appears at the top of your screen for each enemy - you destroy. A square with the number 12 appears each time a squadron of twelve enemies are - destroyed. - -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416). +## Rewards +The number of enemies destroyed is the only reward. A small tank appears at the top of your screen for each enemy you destroy. A square with the number 12 appears each time a squadron of twelve enemies are destroyed. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416). ## Variants diff --git a/docs/environments/atari/seaquest.md b/docs/environments/atari/seaquest.md index 32baf109a..57b1bb49d 100644 --- a/docs/environments/atari/seaquest.md +++ b/docs/environments/atari/seaquest.md @@ -21,7 +21,7 @@ For more Seaquest variants with different observation and action spaces, see the ## Description -You control a sub able to move in all directions and fire torpedoes.The goal is to retrieve as many divers as you can, while dodging and blasting enemy subs and killer sharks; points will be awarded accordingly.The game begins with one sub and three waiting on the horizon. Each time you increase your score by 10,000 points, an extra sub will be delivered to yourbase. You can only have six reserve subs on the screen at one time.Your sub will explode if it collides with anything except your own divers.The sub has a limited amount of oxygen that decreases at a constant rate during the game. When the oxygen tank is almost empty, you need to surface and if you don't do it in time, your sub will blow up and you'll lose one diver. Each time you're forced to surface, with less than six divers, you lose one diver as well. +You control a sub able to move in all directions and fire torpedoes. The goal is to retrieve as many divers as you can, while dodging and blasting enemy subs and killer sharks; points will be awarded accordingly. The game begins with one sub and three waiting on the horizon. Each time you increase your score by 10,000 points, an extra sub will be delivered to yourbase. You can only have six reserve subs on the screen at one time. Your sub will explode if it collides with anything except your own divers. The sub has a limited amount of oxygen that decreases at a constant rate during the game. When the oxygen tank is almost empty, you need to surface and if you don't do it in time, your sub will blow up and you'll lose one diver. Each time you're forced to surface, with less than six divers, you lose one diver as well. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424) @@ -49,22 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -Score points are your only reward. - -Blasting enemy sub and killer shark is worth -20 points. Every time you surface with six divers, the value of enemy subs -and killer sharks increases by 10, up to a maximum of 90 points each. - -Rescued divers start at 50 points each. Then, their point value increases by 50, every -time you surface, up to a maximum of 1000 points each. - -You'll be further rewarded with bonus points for all the oxygen you have remaining the -moment you surface. The more oxygen you have left, the more bonus points -you're given. - -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424). +## Rewards +Score points are your only reward. Blasting enemy sub and killer shark is worth 20 points. Every time you surface with six divers, the value of enemy subs and killer sharks increases by 10, up to a maximum of 90 points each. Rescued divers start at 50 points each. Then, their point value increases by 50, every time you surface, up to a maximum of 1000 points each. You'll be further rewarded with bonus points for all the oxygen you have remaining the moment you surface. The more oxygen you have left, the more bonus points you're given. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424). ## Variants diff --git a/docs/environments/atari/sir_lancelot.md b/docs/environments/atari/sir_lancelot.md index 40fec5bcd..6a5b23279 100644 --- a/docs/environments/atari/sir_lancelot.md +++ b/docs/environments/atari/sir_lancelot.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants SirLancelot has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/skiing.md b/docs/environments/atari/skiing.md index f76c027c3..ddc058bfc 100644 --- a/docs/environments/atari/skiing.md +++ b/docs/environments/atari/skiing.md @@ -21,7 +21,7 @@ For more Skiing variants with different observation and action spaces, see the v ## Description -You control a skier who can move sideways.The goal is to run through all gates (between the poles) in the fastest time.You are penalized five seconds for each gate you miss.If you hit a gate or a tree, your skier will jump back up and keep going. +You control a skier who can move sideways.The goal is to run through all gates (between the poles) in the fastest time. You are penalized five seconds for each gate you miss. If you hit a gate or a tree, your skier will jump back up and keep going. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=434) @@ -45,11 +45,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -Seconds are your only rewards - negative rewards and penalties (e.g. missing a gate) are assigned as additional seconds. - -For a more detailed documentation, see [the AtariAge page [SLALOM RACING section]](https://atariage.com/manual_html_page.php?SoftwareLabelID=434). +## Rewards +Seconds are your only rewards - negative rewards and penalties (e.g. missing a gate) are assigned as additional seconds. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=434) in particular the Slalom racing section. ## Variants diff --git a/docs/environments/atari/solaris.md b/docs/environments/atari/solaris.md index 70efcf09f..c70b5785d 100644 --- a/docs/environments/atari/solaris.md +++ b/docs/environments/atari/solaris.md @@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You gain points for destroying enemies, rescuing cadets, making it through a corridor, destroying enemy planets etc. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450). +## Rewards +You gain points for destroying enemies, rescuing cadets, making it through a corridor, destroying enemy planets etc. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450). ## Variants diff --git a/docs/environments/atari/space_invaders.md b/docs/environments/atari/space_invaders.md index 4edc82452..c04020ce2 100644 --- a/docs/environments/atari/space_invaders.md +++ b/docs/environments/atari/space_invaders.md @@ -46,10 +46,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You gain points for destroying space invaders. The invaders in the back rows are worth more points. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460). +## Rewards +You gain points for destroying space invaders. The invaders in the back rows are worth more points. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460). ## Variants diff --git a/docs/environments/atari/space_war.md b/docs/environments/atari/space_war.md index 0d0c83001..f943726a2 100644 --- a/docs/environments/atari/space_war.md +++ b/docs/environments/atari/space_war.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants SpaceWar has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/star_gunner.md b/docs/environments/atari/star_gunner.md index 1b9fac37e..14e00552d 100644 --- a/docs/environments/atari/star_gunner.md +++ b/docs/environments/atari/star_gunner.md @@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points for destroying enemies. You get bonus points for clearing a wave and a level. -For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html). +## Rewards +You score points for destroying enemies. You get bonus points for clearing a wave and a level. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html). ## Variants diff --git a/docs/environments/atari/superman.md b/docs/environments/atari/superman.md index 2cd3bbb03..ff5678501 100644 --- a/docs/environments/atari/superman.md +++ b/docs/environments/atari/superman.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Superman has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/surround.md b/docs/environments/atari/surround.md index 8ece7ed0d..d013e0274 100644 --- a/docs/environments/atari/surround.md +++ b/docs/environments/atari/surround.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Surround has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/tennis.md b/docs/environments/atari/tennis.md index 9e5649692..66d459d3f 100644 --- a/docs/environments/atari/tennis.md +++ b/docs/environments/atari/tennis.md @@ -21,7 +21,7 @@ For more Tennis variants with different observation and action spaces, see the v ## Description -You control the orange player playing against a computer-controlled blue player. The game follows the rules of tennis.The first player to win at least 6 games with a margin of at least two games wins the match. If the score is tied at 6-6, the first player to go 2 games up wins the match. +You control the orange player playing against a computer-controlled blue player. The game follows the rules of tennis. The first player to win at least 6 games with a margin of at least two games wins the match. If the score is tied at 6-6, the first player to go 2 games up wins the match. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555) @@ -49,10 +49,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -The scoring is as per the sport of tennis, played till one set. -For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555). +## Rewards +The scoring is as per the sport of tennis, played till one set. For a more detailed documentation, see the [AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555). ## Variants diff --git a/docs/environments/atari/tetris.md b/docs/environments/atari/tetris.md index 80db410b2..56ca0e521 100644 --- a/docs/environments/atari/tetris.md +++ b/docs/environments/atari/tetris.md @@ -45,6 +45,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Tetris has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/tic_tac_toe_3d.md b/docs/environments/atari/tic_tac_toe_3d.md index 53343866e..bf9affa50 100644 --- a/docs/environments/atari/tic_tac_toe_3d.md +++ b/docs/environments/atari/tic_tac_toe_3d.md @@ -21,7 +21,7 @@ For more TicTacToe3D variants with different observation and action spaces, see ## Description -Players take turns placing their mark (an X or an O) on a 3-dimensional, 4 x 4 x 4 grid in an attempt to get 4 in a row before their opponent does. +Players take turns placing their mark (an X or an O) on a 3-dimensional, 4x4x4 grid in an attempt to get 4 in a row before their opponent does. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=521) @@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants TicTacToe3D has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/time_pilot.md b/docs/environments/atari/time_pilot.md index f215db8b7..ced2d2ebc 100644 --- a/docs/environments/atari/time_pilot.md +++ b/docs/environments/atari/time_pilot.md @@ -48,10 +48,8 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. -### Rewards - -You score points for destroying enemies, gaining more points for difficult enemies. -For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html). +## Rewards +You score points for destroying enemies, gaining more points for difficult enemies. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html). ## Variants diff --git a/docs/environments/atari/trondead.md b/docs/environments/atari/trondead.md index 5f2f64e53..5f914a539 100644 --- a/docs/environments/atari/trondead.md +++ b/docs/environments/atari/trondead.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Trondead has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/turmoil.md b/docs/environments/atari/turmoil.md index f7383c7e3..3738406f4 100644 --- a/docs/environments/atari/turmoil.md +++ b/docs/environments/atari/turmoil.md @@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Turmoil has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/tutankham.md b/docs/environments/atari/tutankham.md index 91bf4e411..fd45478bf 100644 --- a/docs/environments/atari/tutankham.md +++ b/docs/environments/atari/tutankham.md @@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Tutankham has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/up_n_down.md b/docs/environments/atari/up_n_down.md index 8646c2c75..3d28fa900 100644 --- a/docs/environments/atari/up_n_down.md +++ b/docs/environments/atari/up_n_down.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants UpNDown has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/venture.md b/docs/environments/atari/venture.md index 139fb8081..493e19c6a 100644 --- a/docs/environments/atari/venture.md +++ b/docs/environments/atari/venture.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Venture has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/video_checkers.md b/docs/environments/atari/video_checkers.md index 1e8f3fa32..ee0aeb9be 100644 --- a/docs/environments/atari/video_checkers.md +++ b/docs/environments/atari/video_checkers.md @@ -47,6 +47,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants VideoCheckers has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/video_chess.md b/docs/environments/atari/video_chess.md index a0e446038..b8b15c8c7 100644 --- a/docs/environments/atari/video_chess.md +++ b/docs/environments/atari/video_chess.md @@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants VideoChess has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/video_cube.md b/docs/environments/atari/video_cube.md index ebdae5d3a..59ba9465d 100644 --- a/docs/environments/atari/video_cube.md +++ b/docs/environments/atari/video_cube.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants VideoCube has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/video_pinball.md b/docs/environments/atari/video_pinball.md index c38724da0..fa01f8328 100644 --- a/docs/environments/atari/video_pinball.md +++ b/docs/environments/atari/video_pinball.md @@ -48,6 +48,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants VideoPinball has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/wizard_of_wor.md b/docs/environments/atari/wizard_of_wor.md index 6141b7d1c..804729346 100644 --- a/docs/environments/atari/wizard_of_wor.md +++ b/docs/environments/atari/wizard_of_wor.md @@ -49,6 +49,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants WizardOfWor has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/word_zapper.md b/docs/environments/atari/word_zapper.md index b10230d6c..d2e9f0a9c 100644 --- a/docs/environments/atari/word_zapper.md +++ b/docs/environments/atari/word_zapper.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants WordZapper has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/yars_revenge.md b/docs/environments/atari/yars_revenge.md index f35d6058b..d9b68b35c 100644 --- a/docs/environments/atari/yars_revenge.md +++ b/docs/environments/atari/yars_revenge.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants YarsRevenge has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/atari/zaxxon.md b/docs/environments/atari/zaxxon.md index 287d6b735..1c5df6e5e 100644 --- a/docs/environments/atari/zaxxon.md +++ b/docs/environments/atari/zaxxon.md @@ -50,6 +50,7 @@ Atari environments have three possible observation types: `"rgb"`, `"grayscale"` See variants section for the type of observation used by each environment id by default. + ## Variants Zaxxon has the following variants of the environment id which have the following differences in observation, diff --git a/docs/environments/mujoco.md b/docs/environments/mujoco.md index c55eacfd9..fa311901f 100644 --- a/docs/environments/mujoco.md +++ b/docs/environments/mujoco.md @@ -79,15 +79,22 @@ Gymnasium includes the following versions of the environments: | ------- | --------------- | ------------------------------------------------ | | `v5` | `mujoco=>2.3.3` | Recommended (most features, the least bugs) | | `v4` | `mujoco=>2.1.3` | Maintained for reproducibility | -| `v3` | `mujoco-py` | Maintained for reproducibility (limited support) | -| `v2` | `mujoco-py` | Maintained for reproducibility (limited support) | +| `v3` | `mujoco-py` | Deprecated, Kept for reproducibility (limited support) | +| `v2` | `mujoco-py` | Deprecated, Kept for reproducibility (limited support) | For more information, see the section "Version History" for each environment. `v1` and older are no longer included in Gymnasium. -Note: The exact behavior of the MuJoCo simulator changes slightly between `mujoco` versions due to floating point operation ordering (more information of their [Documentation]( https://mujoco.readthedocs.io/en/stable/computation/index.html#reproducibility)) +### Comparing training performance across versions +The training performance of `v2` and `v3` is identical assuming the same/default arguments were used. +The training performance of `v2`/`v3` and `v4` is not directly comparable because of the change to the newer simulator, but the results for not Ant and not Humanoids are comperable (for more information see [GitHub Comment #1](https://github.com/openai/gym/pull/2595#issuecomment-1099152505) and [GitHub Comment #2](https://github.com/openai/gym/pull/2762#issuecomment-1135362092)). + +The Training performance of `v4` and `v5` is different because of the many changes in the environments, but the Half Cheetah and Swimmer exchibits identical behaviour, Pusher and Swimmer are close (for more information see [GitHub Issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)). + +### Exact reproducibility +Note: The exact behavior of the MuJoCo simulator changes slightly between `mujoco` versions due to floating point operation ordering (more information of their [Documentation]( https://mujoco.readthedocs.io/en/stable/computation/index.html#reproducibility)), if exact reproducibility is need besides using the `seed` for expirements the same simulator version should be used. ## Rendering Arguments The all MuJoCo Environments besides the general Gymnasium arguments, and environment specific arguments they also take the following arguments for configuring the renderer: diff --git a/docs/tutorials/gymnasium_basics/load_quadruped_model.md b/docs/tutorials/gymnasium_basics/load_quadruped_model.md new file mode 100644 index 000000000..d30e85bf0 --- /dev/null +++ b/docs/tutorials/gymnasium_basics/load_quadruped_model.md @@ -0,0 +1,247 @@ +Load custom quadruped robot environments +================================ + +In this tutorial we will see how to use the `MuJoCo/Ant-v5` framework to create a quadruped walking environment, using a model file (ending in `.xml`) without having to create a new class. + +Steps: + +0. Get your **MJCF** (or **URDF**) model file of your robot. + - Create your own model (see the [Guide](https://mujoco.readthedocs.io/en/stable/m22odeling.html)) or, + - Find a ready-made model (in this tutorial, we will use a model from the [**MuJoCo Menagerie**](https://github.com/google-deepmind/mujoco_menagerie) collection). +1. Load the model with the `xml_file` argument. +2. Tweak the environment parameters to get the desired behavior. + 1. Tweak the environment simulation parameters. + 2. Tweak the environment termination parameters. + 3. Tweak the environment reward parameters. + 4. Tweak the environment observation parameters. +3. Train an agent to move your robot. + + +The reader is expected to be familiar with the `Gymnasium` API & library, the basics of robotics, and the included `Gymnasium/MuJoCo` environments with the robot model they use. Familiarity with the **MJCF** file model format and the `MuJoCo` simulator is not required but is recommended. + +Setup +------ +We will need `gymnasium>=1.0.0`. + +```sh +pip install "gymnasium>=1.0.0" +``` + +Step 0.1 - Download a Robot Model +------------------------- +In this tutorial we will load the [Unitree Go1]( +https://github.com/google-deepmind/mujoco_menagerie/blob/main/unitree_go1/README.md) robot from the excellent [MuJoCo Menagerie](https://github.com/google-deepmind/mujoco_menagerie) robot model collection. +![Unitree Go1 robot in a flat terrain scene](https://github.com/google-deepmind/mujoco_menagerie/blob/main/unitree_go1/go1.png?raw=true) + +`Go1` is a quadruped robot, controlling it to move is a significant learning problem, much harder than the `Gymnasium/MuJoCo/Ant` environment. + +We can download the whole MuJoCo Menagerie collection (which includes `Go1`), +```sh +git clone https://github.com/google-deepmind/mujoco_menagerie.git +``` +You can use any other quadruped robot with this tutorial, just adjust the environment parameter values for your robot. + + +Step 1 - Load the model +------------------------- +To load the model, all we have to do is use the `xml_file` argument with the `Ant-v5` framework. + +```py +import gymnasium +import numpy as np +env = gymnasium.make('Ant-v5', xml_file='./mujoco_menagerie/unitree_go1/scene.xml') +``` + +Although this is enough to load the model, we will need to tweak some environment parameters to get the desired behavior for our environment, for now we will also explicitly set the simulation, termination, reward and observation arguments, which we will tweak in the next step. + +```py +env = gymnasium.make( + 'Ant-v5', + xml_file='./mujoco_menagerie/unitree_go1/scene.xml', + forward_reward_weight=0, + ctrl_cost_weight=0, + contact_cost_weight=0, + healthy_reward=0, + main_body=1, + healthy_z_range=(0, np.inf), + include_cfrc_ext_in_observation=True, + exclude_current_positions_from_observation=False, + reset_noise_scale=0, + frame_skip=1, + max_episode_steps=1000, +) +``` + + +Step 2 - Tweaking the Environment Parameters +------------------------- +Tweaking the environment parameters is essential to get the desired behavior for learning. +In the following subsections, the reader is encouraged to consult the [documentation of the arguments](https://gymnasium.farama.org/main/environments/mujoco/ant/#arguments) for more detailed information. + + + +Step 2.1 - Tweaking the Environment Simulation Parameters +------------------------- +The arguments of interest are `frame_skip`, `reset_noise_scale` and `max_episode_steps`. + +We want to tweak the `frame_skip` parameter to get `dt` to an acceptable value (typical values are `dt` $\in [0.01, 0.1]$ seconds), + +Reminder: $dt = frame\_skip \times model.opt.timestep$, where `model.opt.timestep` is the integrator time step selected in the MJCF model file. + +The `Go1` model we are using has an integrator timestep of `0.002`, so by selecting `frame_skip=25` we can set the value of `dt` to `0.05s`. + +To avoid overfitting the policy, `reset_noise_scale` should be set to a value appropriate to the size of the robot, we want the value to be as large as possible without the initial distribution of states being invalid (`Terminal` regardless of control actions), for `Go1` we choose a value of `0.1`. + +And `max_episode_steps` determines the number of steps per episode before `truncation`, here we set it to 1000 to be consistent with the based `Gymnasium/MuJoCo` environments, but if you need something higher you can set it so. + + +```py +env = gymnasium.make( + 'Ant-v5', + xml_file='./mujoco_menagerie/unitree_go1/scene.xml', + forward_reward_weight=0, + ctrl_cost_weight=0, + contact_cost_weight=0, + healthy_reward=0, + main_body=1, + healthy_z_range=(0, np.inf), + include_cfrc_ext_in_observation=True, + exclude_current_positions_from_observation=False, + reset_noise_scale=0.1, # set to avoid policy overfitting + frame_skip=25, # set dt=0.05 + max_episode_steps=1000, # kept at 1000 +) +``` + + +Step 2.2 - Tweaking the Environment Termination Parameters +------------------------- +Termination is important for robot environments to avoid sampling "useless" time steps. + +The arguments of interest are `terminate_when_unhealthy` and `healthy_z_range`. + +We want to set `healthy_z_range` to terminate the environment when the robot falls over, or jumps really high, here we have to choose a value that is logical for the height of the robot, for `Go1` we choose `(0.195, 0.75)`. +Note: `healthy_z_range` checks the absolute value of the height of the robot, so if your scene contains different levels of elevation it should be set to `(-np.inf, np.inf)` + +We could also set `terminate_when_unhealthy=False` to disable termination altogether, which is not desirable in the case of `Go1`. + +```py +env = gymnasium.make( + 'Ant-v5', + xml_file='./mujoco_menagerie/unitree_go1/scene.xml', + forward_reward_weight=0, + ctrl_cost_weight=0, + contact_cost_weight=0, + healthy_reward=0, + main_body=1, + healthy_z_range=(0.195, 0.75), # set to avoid sampling steps where the robot has fallen or jumped too high + include_cfrc_ext_in_observation=True, + exclude_current_positions_from_observation=False, + reset_noise_scale=0.1, + frame_skip=25, + max_episode_steps=1000, +) +``` + +Note: If you need a different termination condition, you can write your own `TerminationWrapper` (see the [documentation](https://gymnasium.farama.org/main/api/wrappers/)). + + + +Step 2.3 - Tweaking the Environment Reward Parameters +------------------------- +The arguments of interest are `forward_reward_weight`, `ctrl_cost_weight`, `contact_cost_weight`, `healthy_reward`, and `main_body`. + +For the arguments `forward_reward_weight`, `ctrl_cost_weight`, `contact_cost_weight` and `healthy_reward` we have to pick values that make sense for our robot, you can use the default `MuJoCo/Ant` parameters for references and tweak them if a change is needed for your environment. In the case of `Go1` we only change the `ctrl_cost_weight` since it has a higher actuator force range. + +For the argument `main_body` we have to choose which body part is the main body (usually called something like "torso" or "trunk" in the model file) for the calculation of the `forward_reward`, in the case of `Go1` it is the `"trunk"` (Note: in most cases including this one, it can be left at the default value). + +```py +env = gymnasium.make( + 'Ant-v5', + xml_file='./mujoco_menagerie/unitree_go1/scene.xml', + forward_reward_weight=1, # kept the same as the 'Ant' environment + ctrl_cost_weight=0.05, # changed because of the stronger motors of `Go1` + contact_cost_weight=5e-4, # kept the same as the 'Ant' environment + healthy_reward=1, # kept the same as the 'Ant' environment + main_body=1, # represents the "trunk" of the `Go1` robot + healthy_z_range=(0.195, 0.75), + include_cfrc_ext_in_observation=True, + exclude_current_positions_from_observation=False, + reset_noise_scale=0.1, + frame_skip=25, + max_episode_steps=1000, +) +``` + +Note: If you need a different reward function, you can write your own `RewardWrapper` (see the [documentation](https://gymnasium.farama.org/main/api/wrappers/reward_wrappers/)). + + + +Step 2.4 - Tweaking the Environment Observation Parameters +------------------------- +The arguments of interest are `include_cfrc_ext_in_observation` and `exclude_current_positions_from_observation`. + +Here for `Go1` we have no particular reason to change them. + +```py +env = gymnasium.make( + 'Ant-v5', + xml_file='./mujoco_menagerie/unitree_go1/scene.xml', + forward_reward_weight=1, + ctrl_cost_weight=0.05, + contact_cost_weight=5e-4, + healthy_reward=1, + main_body=1, + healthy_z_range=(0.195, 0.75), + include_cfrc_ext_in_observation=True, # kept the game as the 'Ant' environment + exclude_current_positions_from_observation=False, # kept the game as the 'Ant' environment + reset_noise_scale=0.1, + frame_skip=25, + max_episode_steps=1000, +) +``` + + +Note: If you need additional observation elements (such as additional sensors), you can write your own `ObservationWrapper` (see the [documentation](https://gymnasium.farama.org/main/api/wrappers/observation_wrappers/)). + + + +Step 3 - Train your Agent +------------------------- +Finally, we are done, we can use a RL algorithm to train an agent to walk/run the `Go1` robot. +Note: If you have followed this guide with your own robot model, you may discover during training that some environment parameters were not as desired, feel free to go back to step 2 and change anything as needed. + +```py +import gymnasium + +env = gymnasium.make( + 'Ant-v5', + xml_file='./mujoco_menagerie/unitree_go1/scene.xml', + forward_reward_weight=1, + ctrl_cost_weight=0.05, + contact_cost_weight=5e-4, + healthy_reward=1, + main_body=1, + healthy_z_range=(0.195, 0.75), + include_cfrc_ext_in_observation=True, + exclude_current_positions_from_observation=False, + reset_noise_scale=0.1, + frame_skip=25, + max_episode_steps=1000, +) +... # run your RL algorithm +``` +![image](https://github.com/Kallinteris-Andreas/Gymnasium-kalli/assets/30759571/bf1797a3-264d-47de-b14c-e3c16072f695) + + + + + +Epilogue +------------------------- +You can follow this guide to create most quadruped environments. +To create humanoid/bipedal robots, you can also follow this guide using the `Gymnasium/MuJoCo/Humnaoid-v5` framework. + +Author: [@kallinteris-andreas](https://github.com/Kallinteris-Andreas) diff --git a/docs/tutorials/third-party-tutorials.md b/docs/tutorials/third-party-tutorials.md index 2883cb75e..5fc792957 100644 --- a/docs/tutorials/third-party-tutorials.md +++ b/docs/tutorials/third-party-tutorials.md @@ -1,7 +1,16 @@ - # Third-Party Tutorials -## [AgileRL](https://docs.agilerl.com/en/latest/tutorials/gymnasium/index.html) + +*This page contains tutorials which are not maintained by Farama Foundation and, as such, cannot be guaranteed to function as intended.* + +*If you'd like to contribute an tutorial, please reach out on [Discord](https://discord.gg/bnJ6kubTg6).* + + +## Third-Party tutorials about gymnasium + +## Third-Party tutorials about libraries that use gymnasium + +### [AgileRL](https://docs.agilerl.com/en/latest/tutorials/gymnasium/index.html) AgileRL focuses on reducing the time taken for training models and hyperparameter optimisation (HPO) providing tutorials for using it with PPO, TD3 and Rainbow. diff --git a/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py b/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py index 355699b75..3312f2873 100644 --- a/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py +++ b/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py @@ -33,11 +33,12 @@ ``env.step(A)`` allows us to take an action 'A' in the current environment 'env'. The environment then executes the action and returns five variables: -- ``next_obs``: This is the observation that the agent will receive after taking the action. -- ``reward``: This is the reward that the agent will receive after taking the action. -- ``terminated``: This is a boolean variable that indicates whether or not the environment has terminated. -- ``truncated``: This is a boolean variable that also indicates whether the episode ended by early truncation, i.e., a time limit is reached. -- ``info``: This is a dictionary that might contain additional information about the environment. + - ``next_obs``: This is the observation that the agent will receive after taking the action. + - ``reward``: This is the reward that the agent will receive after taking the action. + - ``terminated``: This is a boolean variable that indicates whether or not the environment has terminated. + - ``truncated``: This is a boolean variable that also indicates whether the episode ended by early truncation, i.e., a time limit is reached. + - ``info``: This is a dictionary that might contain additional information about the environment. + """ from __future__ import annotations diff --git a/gymnasium/__init__.py b/gymnasium/__init__.py index 8f1e28ad4..21218046c 100644 --- a/gymnasium/__init__.py +++ b/gymnasium/__init__.py @@ -52,7 +52,7 @@ "logger", "functional", ] -__version__ = "1.0.0rc1" +__version__ = "1.0.0a1" # Initializing pygame initializes audio connections through SDL. SDL uses alsa by default on all Linux systems diff --git a/gymnasium/core.py b/gymnasium/core.py index e7bea4aca..304dad709 100644 --- a/gymnasium/core.py +++ b/gymnasium/core.py @@ -66,6 +66,8 @@ class Env(Generic[ObsType, ActType]): # Created _np_random: np.random.Generator | None = None + # will be set to the "invalid" value -1 if the seed of the currently set rng is unknown + _np_random_seed: int | None = None def step( self, action: ActType @@ -90,7 +92,7 @@ def step( reward (SupportsFloat): The reward as a result of taking the action. terminated (bool): Whether the agent reaches the terminal state (as defined under the MDP of the task) which can be positive or negative. An example is reaching the goal state or moving into the lava from - the Sutton and Barton, Gridworld. If true, the user needs to call :meth:`reset`. + the Sutton and Barto Gridworld. If true, the user needs to call :meth:`reset`. truncated (bool): Whether the truncation condition outside the scope of the MDP is satisfied. Typically, this is a timelimit, but could also be used to indicate an agent physically going out of bounds. Can be used to end the episode prematurely before a terminal state is reached. @@ -130,10 +132,12 @@ def reset( The ``return_info`` parameter was removed and now info is expected to be returned. Args: - seed (optional int): The seed that is used to initialize the environment's PRNG (`np_random`). + seed (optional int): The seed that is used to initialize the environment's PRNG (`np_random`) and + the read-only attribute `np_random_seed`. If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). - However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset. + However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset + and the env's :attr:`np_random_seed` will *not* be altered. If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer *right after the environment has been initialized and then never again*. Please refer to the minimal example above to see this paradigm in action. @@ -148,7 +152,7 @@ def reset( """ # Initialize the RNG if the seed is manually passed if seed is not None: - self._np_random, seed = seeding.np_random(seed) + self._np_random, self._np_random_seed = seeding.np_random(seed) def render(self) -> RenderFrame | list[RenderFrame] | None: """Compute the render frames as specified by :attr:`render_mode` during the initialization of the environment. @@ -201,6 +205,20 @@ def unwrapped(self) -> Env[ObsType, ActType]: """ return self + @property + def np_random_seed(self) -> int: + """Returns the environment's internal :attr:`_np_random_seed` that if not set will first initialise with a random int as seed. + + If :attr:`np_random_seed` was set directly instead of through :meth:`reset` or :meth:`set_np_random_through_seed`, + the seed will take the value -1. + + Returns: + int: the seed of the current `np_random` or -1, if the seed of the rng is unknown + """ + if self._np_random_seed is None: + self._np_random, self._np_random_seed = seeding.np_random() + return self._np_random_seed + @property def np_random(self) -> np.random.Generator: """Returns the environment's internal :attr:`_np_random` that if not set will initialise with a random seed. @@ -209,12 +227,20 @@ def np_random(self) -> np.random.Generator: Instances of `np.random.Generator` """ if self._np_random is None: - self._np_random, _ = seeding.np_random() + self._np_random, self._np_random_seed = seeding.np_random() return self._np_random @np_random.setter def np_random(self, value: np.random.Generator): + """Sets the environment's internal :attr:`_np_random` with the user-provided Generator. + + Since it is generally not possible to extract a seed from an instance of a random number generator, + this will also set the :attr:`_np_random_seed` to `-1`, which is not valid as input for the creation + of a numpy rng. + """ self._np_random = value + # Setting a numpy rng with -1 will cause a ValueError + self._np_random_seed = -1 def __str__(self): """Returns a string of the environment with :attr:`spec` id's if :attr:`spec. @@ -303,6 +329,11 @@ def close(self): """Closes the wrapper and :attr:`env`.""" return self.env.close() + @property + def np_random_seed(self) -> int | None: + """Returns the base enviroment's :attr:`np_random_seed`.""" + return self.env.np_random_seed + @property def unwrapped(self) -> Env[ObsType, ActType]: """Returns the base environment of the wrapper. diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index 14ad83164..ae4dd7b0b 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -142,11 +142,15 @@ class BipedalWalker(gym.Env, EzPickle): if the walker exceeds the right end of the terrain length. ## Arguments - To use the _hardcore_ environment, you need to specify the - `hardcore=True` argument like below: + + To use the _hardcore_ environment, you need to specify the `hardcore=True`: + ```python - import gymnasium as gym - env = gym.make("BipedalWalker-v3", hardcore=True) + >>> import gymnasium as gym + >>> env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array") + >>> env + >>>> + ``` ## Version History @@ -778,13 +782,7 @@ def __init__(self): ) -if __name__ == "__main__": - # Heurisic: suboptimal, have no notion of balance. - env = BipedalWalker() - env.reset() - steps = 0 - total_reward = 0 - a = np.array([0.0, 0.0, 0.0, 0.0]) +class BipedalWalkerHeuristics: STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3 SPEED = 0.29 # Will fall forward on higher speed state = STAY_ON_ONE_LEG @@ -792,51 +790,45 @@ def __init__(self): supporting_leg = 1 - moving_leg SUPPORT_KNEE_ANGLE = +0.1 supporting_knee_angle = SUPPORT_KNEE_ANGLE - while True: - s, r, terminated, truncated, info = env.step(a) - total_reward += r - if steps % 20 == 0 or terminated or truncated: - print("\naction " + str([f"{x:+0.2f}" for x in a])) - print(f"step {steps} total_reward {total_reward:+0.2f}") - print("hull " + str([f"{x:+0.2f}" for x in s[0:4]])) - print("leg0 " + str([f"{x:+0.2f}" for x in s[4:9]])) - print("leg1 " + str([f"{x:+0.2f}" for x in s[9:14]])) - steps += 1 + a = np.array([0.0, 0.0, 0.0, 0.0]) - contact0 = s[8] - contact1 = s[13] - moving_s_base = 4 + 5 * moving_leg - supporting_s_base = 4 + 5 * supporting_leg + def step_heuristic(self, s): + moving_s_base = 4 + 5 * self.moving_leg + supporting_s_base = 4 + 5 * self.supporting_leg hip_targ = [None, None] # -0.8 .. +1.1 knee_targ = [None, None] # -0.6 .. +0.9 hip_todo = [0.0, 0.0] knee_todo = [0.0, 0.0] - if state == STAY_ON_ONE_LEG: - hip_targ[moving_leg] = 1.1 - knee_targ[moving_leg] = -0.6 - supporting_knee_angle += 0.03 - if s[2] > SPEED: - supporting_knee_angle += 0.03 - supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE) - knee_targ[supporting_leg] = supporting_knee_angle + if self.state == self.STAY_ON_ONE_LEG: + hip_targ[self.moving_leg] = 1.1 + knee_targ[self.moving_leg] = -0.6 + self.supporting_knee_angle += 0.03 + if s[2] > self.SPEED: + self.supporting_knee_angle += 0.03 + self.supporting_knee_angle = min( + self.supporting_knee_angle, self.SUPPORT_KNEE_ANGLE + ) + knee_targ[self.supporting_leg] = self.supporting_knee_angle if s[supporting_s_base + 0] < 0.10: # supporting leg is behind - state = PUT_OTHER_DOWN - if state == PUT_OTHER_DOWN: - hip_targ[moving_leg] = +0.1 - knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE - knee_targ[supporting_leg] = supporting_knee_angle + self.state = self.PUT_OTHER_DOWN + if self.state == self.PUT_OTHER_DOWN: + hip_targ[self.moving_leg] = +0.1 + knee_targ[self.moving_leg] = self.SUPPORT_KNEE_ANGLE + knee_targ[self.supporting_leg] = self.supporting_knee_angle if s[moving_s_base + 4]: - state = PUSH_OFF - supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE) - if state == PUSH_OFF: - knee_targ[moving_leg] = supporting_knee_angle - knee_targ[supporting_leg] = +1.0 - if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED: - state = STAY_ON_ONE_LEG - moving_leg = 1 - moving_leg - supporting_leg = 1 - moving_leg + self.state = self.PUSH_OFF + self.supporting_knee_angle = min( + s[moving_s_base + 2], self.SUPPORT_KNEE_ANGLE + ) + if self.state == self.PUSH_OFF: + knee_targ[self.moving_leg] = self.supporting_knee_angle + knee_targ[self.supporting_leg] = +1.0 + if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * self.SPEED: + self.state = self.STAY_ON_ONE_LEG + self.moving_leg = 1 - self.moving_leg + self.supporting_leg = 1 - self.moving_leg if hip_targ[0]: hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5] @@ -852,11 +844,35 @@ def __init__(self): knee_todo[0] -= 15.0 * s[3] # vertical speed, to damp oscillations knee_todo[1] -= 15.0 * s[3] - a[0] = hip_todo[0] - a[1] = knee_todo[0] - a[2] = hip_todo[1] - a[3] = knee_todo[1] - a = np.clip(0.5 * a, -1.0, 1.0) + self.a[0] = hip_todo[0] + self.a[1] = knee_todo[0] + self.a[2] = hip_todo[1] + self.a[3] = knee_todo[1] + self.a = np.clip(0.5 * self.a, -1.0, 1.0) + + return self.a + + +if __name__ == "__main__": + env = BipedalWalker(render_mode="human") + env.reset() + steps = 0 + total_reward = 0 + a = np.array([0.0, 0.0, 0.0, 0.0]) + # Heurisic: suboptimal, have no notion of balance. + heuristics = BipedalWalkerHeuristics() + while True: + s, r, terminated, truncated, info = env.step(a) + total_reward += r + if steps % 20 == 0 or terminated or truncated: + print("\naction " + str([f"{x:+0.2f}" for x in a])) + print(f"step {steps} total_reward {total_reward:+0.2f}") + print("hull " + str([f"{x:+0.2f}" for x in s[0:4]])) + print("leg0 " + str([f"{x:+0.2f}" for x in s[4:9]])) + print("leg1 " + str([f"{x:+0.2f}" for x in s[9:14]])) + steps += 1 + + a = heuristics.step_heuristic(s) if terminated or truncated: break diff --git a/gymnasium/envs/box2d/car_racing.py b/gymnasium/envs/box2d/car_racing.py index 5044bb400..6563b872f 100644 --- a/gymnasium/envs/box2d/car_racing.py +++ b/gymnasium/envs/box2d/car_racing.py @@ -115,7 +115,7 @@ class CarRacing(gym.Env, EzPickle): state RGB buffer. From left to right: true speed, four ABS sensors, steering wheel position, and gyroscope. To play yourself (it's rather fast for humans), type: - ``` + ```shell python gymnasium/envs/box2d/car_racing.py ``` Remember: it's a powerful rear-wheel drive car - don't press the accelerator @@ -139,46 +139,54 @@ class CarRacing(gym.Env, EzPickle): A top-down 96x96 RGB image of the car and race track. ## Rewards - The reward is -0.1 every frame and +1000/N for every track tile visited, - where N is the total number of tiles visited in the track. For example, - if you have finished in 732 frames, your reward is - 1000 - 0.1*732 = 926.8 points. + The reward is -0.1 every frame and +1000/N for every track tile visited, where N is the total number of tiles + visited in the track. For example, if you have finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points. ## Starting State The car starts at rest in the center of the road. ## Episode Termination - The episode finishes when all the tiles are visited. The car can also go - outside the playfield - that is, far off the track, in which case it will - receive -100 reward and die. + The episode finishes when all the tiles are visited. The car can also go outside the playfield - + that is, far off the track, in which case it will receive -100 reward and die. ## Arguments - `lap_complete_percent` dictates the percentage of tiles that must be visited by - the agent before a lap is considered complete. - Passing `domain_randomize=True` enables the domain randomized variant of the environment. - In this scenario, the background and track colours are different on every reset. + ```python + >>> import gymnasium as gym + >>> env = gym.make("CarRacing-v2", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=False) + >>> env + >>>> + + ``` + + * `lap_complete_percent=0.95` dictates the percentage of tiles that must be visited by + the agent before a lap is considered complete. - Passing `continuous=False` converts the environment to use discrete action space. - The discrete action space has 5 actions: [do nothing, left, right, gas, brake]. + * `domain_randomize=False` enables the domain randomized variant of the environment. + In this scenario, the background and track colours are different on every reset. + + * `continuous=True` converts the environment to use discrete action space. + The discrete action space has 5 actions: [do nothing, left, right, gas, brake]. ## Reset Arguments + Passing the option `options["randomize"] = True` will change the current colour of the environment on demand. Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment. `domain_randomize` must be `True` on init for this argument to work. - Example usage: + ```python - import gymnasium as gym - env = gym.make("CarRacing-v1", domain_randomize=True) + >>> import gymnasium as gym + >>> env = gym.make("CarRacing-v2", domain_randomize=True) # normal reset, this changes the colour scheme by default - env.reset() + >>> obs, _ = env.reset() # reset with colour scheme change - env.reset(options={"randomize": True}) + >>> randomize_obs, _ = env.reset(options={"randomize": True}) # reset with no colour scheme change - env.reset(options={"randomize": False}) + >>> non_random_obs, _ = env.reset(options={"randomize": False}) + ``` ## Version History diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py index 0d6682443..4e3790bac 100644 --- a/gymnasium/envs/box2d/lunar_lander.py +++ b/gymnasium/envs/box2d/lunar_lander.py @@ -93,7 +93,7 @@ class LunarLander(gym.Env, EzPickle): can learn to fly and then land on its first attempt. To see a heuristic landing, run: - ``` + ```shell python gymnasium/envs/box2d/lunar_lander.py ``` @@ -145,74 +145,60 @@ class LunarLander(gym.Env, EzPickle): > them is destroyed. ## Arguments - To use the _continuous_ environment, you need to specify the - `continuous=True` argument like below: + + Lunar Lander has a large number of arguments + ```python - import gymnasium as gym - env = gym.make( - "LunarLander-v2", - continuous: bool = False, - gravity: float = -10.0, - enable_wind: bool = False, - wind_power: float = 15.0, - turbulence_power: float = 1.5, - ) + >>> import gymnasium as gym + >>> env = gym.make("LunarLander-v2", continuous=False, gravity=-10.0, + ... enable_wind=False, wind_power=15.0, turbulence_power=1.5) + >>> env + >>>> + ``` - If `continuous=True` is passed, continuous actions (corresponding to the throttle of the engines) will be used and the - action space will be `Box(-1, +1, (2,), dtype=np.float32)`. - The first coordinate of an action determines the throttle of the main engine, while the second - coordinate specifies the throttle of the lateral boosters. - Given an action `np.array([main, lateral])`, the main engine will be turned off completely if - `main < 0` and the throttle scales affinely from 50% to 100% for `0 <= main <= 1` (in particular, the - main engine doesn't work with less than 50% power). - Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left - booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely - from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively). - - `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12. - - If `enable_wind=True` is passed, there will be wind effects applied to the lander. - The wind is generated using the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))`. - `k` is set to 0.01. - `C` is sampled randomly between -9999 and 9999. - - `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for `wind_power` is between 0.0 and 20.0. - `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. The recommended value for `turbulence_power` is between 0.0 and 2.0. + + * `continuous` determines if discrete or continuous actions (corresponding to the throttle of the engines) will be used with the + action space being `Discrete(4)` or `Box(-1, +1, (2,), dtype=np.float32)` respectively. + For continuous actions, the first coordinate of an action determines the throttle of the main engine, while the second + coordinate specifies the throttle of the lateral boosters. Given an action `np.array([main, lateral])`, the main + engine will be turned off completely if `main < 0` and the throttle scales affinely from 50% to 100% for + `0 <= main <= 1` (in particular, the main engine doesn't work with less than 50% power). + Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left + booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely + from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively). + + * `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12. Default is -10.0 + + * `enable_wind` determines if there will be wind effects applied to the lander. The wind is generated using + the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))` where `k` is set to 0.01 and `C` is sampled randomly between -9999 and 9999. + + * `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for + `wind_power` is between 0.0 and 20.0. + + * `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. + The recommended value for `turbulence_power` is between 0.0 and 2.0. ## Version History - v2: Count energy spent and in v0.24, added turbulence with wind power and turbulence_power parameters - - v1: Legs contact with ground added in state vector; contact with ground - give +10 reward points, and -10 if then lose contact; reward - renormalized to 200; harder initial random push. + - v1: Legs contact with ground added in state vector; contact with ground give +10 reward points, + and -10 if then lose contact; reward renormalized to 200; harder initial random push. - v0: Initial version - ## Notes There are several unexpected bugs with the implementation of the environment. - 1. The position of the side thursters on the body of the lander changes, depending on the orientation of the lander. - This in turn results in an orientation depentant torque being applied to the lander. + 1. The position of the side thrusters on the body of the lander changes, depending on the orientation of the lander. + This in turn results in an orientation dependent torque being applied to the lander. 2. The units of the state are not consistent. I.e. * The angular velocity is in units of 0.4 radians per second. In order to convert to radians per second, the value needs to be multiplied by a factor of 2.5. For the default values of VIEWPORT_W, VIEWPORT_H, SCALE, and FPS, the scale factors equal: - 'x': 10 - 'y': 6.666 - 'vx': 5 - 'vy': 7.5 - 'angle': 1 - 'angular velocity': 2.5 + 'x': 10, 'y': 6.666, 'vx': 5, 'vy': 7.5, 'angle': 1, 'angular velocity': 2.5 After the correction has been made, the units of the state are as follows: - 'x': (units) - 'y': (units) - 'vx': (units/second) - 'vy': (units/second) - 'angle': (radians) - 'angular velocity': (radians/second) - + 'x': (units), 'y': (units), 'vx': (units/second), 'vy': (units/second), 'angle': (radians), 'angular velocity': (radians/second) diff --git a/gymnasium/envs/classic_control/acrobot.py b/gymnasium/envs/classic_control/acrobot.py index 75f9cd465..1700b93ab 100644 --- a/gymnasium/envs/classic_control/acrobot.py +++ b/gymnasium/envs/classic_control/acrobot.py @@ -96,15 +96,19 @@ class AcrobotEnv(Env): ## Arguments - No additional arguments are currently supported during construction. + Acrobot only has `render_mode` as a keyword for `gymnasium.make`. + On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. ```python - import gymnasium as gym - env = gym.make('Acrobot-v1') - ``` + >>> import gymnasium as gym + >>> env = gym.make('Acrobot-v1', render_mode="rgb_array") + >>> env + >>>> + >>> env.reset(seed=123, options={"low": -0.2, "high": 0.2}) # default low=-0.1, high=0.1 + (array([ 0.997341 , 0.07287608, 0.9841162 , -0.17752565, -0.11185605, + -0.12625128], dtype=float32), {}) - On reset, the `options` parameter allows the user to change the bounds used to determine - the new random state. + ``` By default, the dynamics of the acrobot follow those described in Sutton and Barto's book [Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html). @@ -118,20 +122,17 @@ class AcrobotEnv(Env): See the following note for details: - > The dynamics equations were missing some terms in the NIPS paper which - are present in the book. R. Sutton confirmed in personal correspondence - that the experimental results shown in the paper and the book were - generated with the equations shown in the book. - However, there is the option to run the domain with the paper equations - by setting `book_or_nips = 'nips'` - + > The dynamics equations were missing some terms in the NIPS paper which are present in the book. + R. Sutton confirmed in personal correspondence that the experimental results shown in the paper and the book were + generated with the equations shown in the book. However, there is the option to run the domain with the paper equations + by setting `book_or_nips = 'nips'` ## Version History - v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of `theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the sine and cosine of each angle instead. - - v0: Initial versions release (1.0.0) (removed from gymnasium for v1) + - v0: Initial versions release ## References - Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding. @@ -383,8 +384,8 @@ def close(self): def wrap(x, m, M): - """Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which - truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n + """Wraps `x` so m <= x <= M; but unlike `bound()` which + truncates, `wrap()` wraps x around the coordinate system defined by m,M.\n For example, m = -180, M = 180 (degrees), x = 360 --> returns 0. Args: @@ -439,7 +440,7 @@ def rk4(derivs, y0, t): >>> yout = rk4(derivs, y0, t) Args: - derivs: the derivative of the system and has the signature ``dy = derivs(yi)`` + derivs: the derivative of the system and has the signature `dy = derivs(yi)` y0: initial state vector t: sample times diff --git a/gymnasium/envs/classic_control/cartpole.py b/gymnasium/envs/classic_control/cartpole.py index 6ef96efad..526426c13 100644 --- a/gymnasium/envs/classic_control/cartpole.py +++ b/gymnasium/envs/classic_control/cartpole.py @@ -74,29 +74,33 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): ## Arguments - Cartpole only has ``render_mode`` as a keyword for ``gymnasium.make``. + Cartpole only has `render_mode` as a keyword for `gymnasium.make`. On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. - Examples: - >>> import gymnasium as gym - >>> env = gym.make("CartPole-v1", render_mode="rgb_array") - >>> env - >>>> - >>> env.reset(seed=123, options={"low": 0, "high": 1}) - (array([0.6823519 , 0.05382102, 0.22035988, 0.18437181], dtype=float32), {}) + ```python + >>> import gymnasium as gym + >>> env = gym.make("CartPole-v1", render_mode="rgb_array") + >>> env + >>>> + >>> env.reset(seed=123, options={"low": -0.1, "high": 0.1}) # default low=-0.05, high=0.05 + (array([ 0.03647037, -0.0892358 , -0.05592803, -0.06312564], dtype=float32), {}) + + ``` ## Vectorized environment To increase steps per seconds, users can use a custom vector environment or with an environment vectorizor. - Examples: - >>> import gymnasium as gym - >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="vector_entry_point") - >>> envs - CartPoleVectorEnv(CartPole-v1, num_envs=3) - >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync") - >>> envs - SyncVectorEnv(CartPole-v1, num_envs=3) + ```python + >>> import gymnasium as gym + >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="vector_entry_point") + >>> envs + CartPoleVectorEnv(CartPole-v1, num_envs=3) + >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync") + >>> envs + SyncVectorEnv(CartPole-v1, num_envs=3) + + ``` """ metadata = { @@ -360,6 +364,7 @@ def __init__( self.kinematics_integrator = "euler" self.steps = np.zeros(num_envs, dtype=np.int32) + self.prev_done = np.zeros(num_envs, dtype=np.bool_) # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * math.pi / 360 @@ -441,21 +446,23 @@ def step( truncated = self.steps >= self.max_episode_steps - done = terminated | truncated + reward = np.ones_like(terminated, dtype=np.float32) - if any(done): - # This code was generated by copilot, need to check if it works - self.state[:, done] = self.np_random.uniform( - low=self.low, high=self.high, size=(4, done.sum()) - ).astype(np.float32) - self.steps[done] = 0 + # Reset all environments which terminated or were truncated in the last step + self.state[:, self.prev_done] = self.np_random.uniform( + low=self.low, high=self.high, size=(4, self.prev_done.sum()) + ) + self.steps[self.prev_done] = 0 + reward[self.prev_done] = 0.0 + terminated[self.prev_done] = False + truncated[self.prev_done] = False - reward = np.ones_like(terminated, dtype=np.float32) + self.prev_done = terminated | truncated if self.render_mode == "human": self.render() - return self.state.T, reward, terminated, truncated, {} + return self.state.T.astype(np.float32), reward, terminated, truncated, {} def reset( self, @@ -471,12 +478,14 @@ def reset( ) # default high self.state = self.np_random.uniform( low=self.low, high=self.high, size=(4, self.num_envs) - ).astype(np.float32) + ) self.steps_beyond_terminated = None + self.steps = np.zeros(self.num_envs, dtype=np.int32) + self.prev_done = np.zeros(self.num_envs, dtype=np.bool_) if self.render_mode == "human": self.render() - return self.state.T, {} + return self.state.T.astype(np.float32), {} def render(self): if self.render_mode is None: diff --git a/gymnasium/envs/classic_control/continuous_mountain_car.py b/gymnasium/envs/classic_control/continuous_mountain_car.py index 6397f7e97..f27577fed 100644 --- a/gymnasium/envs/classic_control/continuous_mountain_car.py +++ b/gymnasium/envs/classic_control/continuous_mountain_car.py @@ -91,17 +91,22 @@ class Continuous_MountainCarEnv(gym.Env): ## Arguments + Continuous Mountain Car has two parameters for `gymnasium.make` with `render_mode` and `goal_velocity`. + On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. + ```python - import gymnasium as gym - gym.make('MountainCarContinuous-v0') - ``` + >>> import gymnasium as gym + >>> env = gym.make("MountainCarContinuous-v0", render_mode="rgb_array", goal_velocity=0.1) # default goal_velocity=0 + >>> env + >>>> + >>> env.reset(seed=123, options={"low": -0.7, "high": -0.5}) # default low=-0.6, high=-0.4 + (array([-0.5635296, 0. ], dtype=float32), {}) - On reset, the `options` parameter allows the user to change the bounds used to determine - the new random state. + ``` ## Version History - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/classic_control/mountain_car.py b/gymnasium/envs/classic_control/mountain_car.py index dfc06070a..a6157377c 100644 --- a/gymnasium/envs/classic_control/mountain_car.py +++ b/gymnasium/envs/classic_control/mountain_car.py @@ -80,20 +80,24 @@ class MountainCarEnv(gym.Env): 1. Termination: The position of the car is greater than or equal to 0.5 (the goal position on top of the right hill) 2. Truncation: The length of the episode is 200. - ## Arguments + Mountain Car has two parameters for `gymnasium.make` with `render_mode` and `goal_velocity`. + On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. + ```python - import gymnasium as gym - gym.make('MountainCar-v0') - ``` + >>> import gymnasium as gym + >>> env = gym.make("MountainCar-v0", render_mode="rgb_array", goal_velocity=0.1) # default goal_velocity=0 + >>> env + >>>> + >>> env.reset(seed=123, options={"x_init": np.pi/2, "y_init": 0.5}) # default x_init=np.pi, y_init=1.0 + (array([-0.46352962, 0. ], dtype=float32), {}) - On reset, the `options` parameter allows the user to change the bounds used to determine - the new random state. + ``` ## Version History - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/classic_control/pendulum.py b/gymnasium/envs/classic_control/pendulum.py index 0c1516680..64866ecaa 100644 --- a/gymnasium/envs/classic_control/pendulum.py +++ b/gymnasium/envs/classic_control/pendulum.py @@ -29,7 +29,7 @@ class PendulumEnv(gym.Env): ![Pendulum Coordinate System](/_static/diagrams/pendulum.png) - - `x-y`: cartesian coordinates of the pendulum's end in meters. + - `x-y`: cartesian coordinates of the pendulum's end in meters. - `theta` : angle in radians. - `tau`: torque in `N m`. Defined as positive _counter-clockwise_. @@ -41,7 +41,6 @@ class PendulumEnv(gym.Env): |-----|--------|------|-----| | 0 | Torque | -2.0 | 2.0 | - ## Observation Space The observation is a `ndarray` with shape `(3,)` representing the x-y coordinates of the pendulum's free @@ -74,22 +73,27 @@ class PendulumEnv(gym.Env): ## Arguments - - `g`: acceleration of gravity measured in *(m s-2)* used to calculate the pendulum dynamics. - The default value is g = 10.0 . + - `g`: . + + Pendulum has two parameters for `gymnasium.make` with `render_mode` and `g` representing + the acceleration of gravity measured in *(m s-2)* used to calculate the pendulum dynamics. + The default value is `g = 10.0`. + On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. ```python - import gymnasium as gym - gym.make('Pendulum-v1', g=9.81) - ``` + >>> import gymnasium as gym + >>> env = gym.make("Pendulum-v1", render_mode="rgb_array", g=9.81) # default g=10.0 + >>> env + >>>> + >>> env.reset(seed=123, options={"low": -0.7, "high": 0.5}) # default low=-0.6, high=-0.5 + (array([ 0.4123625 , 0.91101986, -0.89235795], dtype=float32), {}) - On reset, the `options` parameter allows the user to change the bounds used to determine - the new random state. + ``` ## Version History * v1: Simplify the math equations, no difference in behavior. - * v0: Initial versions release (1.0.0) - + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/mujoco/__init__.py b/gymnasium/envs/mujoco/__init__.py index a8d029e60..b85325fa6 100644 --- a/gymnasium/envs/mujoco/__init__.py +++ b/gymnasium/envs/mujoco/__init__.py @@ -1,2 +1,2 @@ -from gymnasium.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_env import MujocoEnv from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer diff --git a/gymnasium/envs/mujoco/ant.py b/gymnasium/envs/mujoco/ant.py index 3cccf8cb2..1e366a123 100644 --- a/gymnasium/envs/mujoco/ant.py +++ b/gymnasium/envs/mujoco/ant.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/ant_v3.py b/gymnasium/envs/mujoco/ant_v3.py index ebf5fa000..0fdc11851 100644 --- a/gymnasium/envs/mujoco/ant_v3.py +++ b/gymnasium/envs/mujoco/ant_v3.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/ant_v5.py b/gymnasium/envs/mujoco/ant_v5.py index 768cd5f9f..abe0db87c 100644 --- a/gymnasium/envs/mujoco/ant_v5.py +++ b/gymnasium/envs/mujoco/ant_v5.py @@ -21,6 +21,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): The ant is a 3D quadruped robot consisting of a torso (free rotational body) with four legs attached to it, where each leg has two body parts. The goal is to coordinate the four legs to move in the forward (right) direction by applying torque to the eight hinges connecting the two body parts of each leg and the torso (nine body parts and eight hinges). + Note: Although the robot is called "Ant", it is actually 75cm tall and weighs 910.88g, with the torso being 327.25g and each leg being 145.91g. ## Action Space ```{figure} action_space_figures/ant.png @@ -157,14 +158,14 @@ class AntEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination If `terminate_when_unhealthy is True` (the default), the environment terminates when the Ant is unhealthy. the Ant is unhealthy if any of the following happens: 1. Any of the state space values is no longer finite. 2. The z-coordinate of the torso (the height) is **not** in the closed interval given by the `healthy_z_range` argument (default is $[0.2, 1.0]$). - #### Truncation + ### Truncation The default duration of an episode is 1000 timesteps. @@ -215,7 +216,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/mujoco/half_cheetah.py b/gymnasium/envs/mujoco/half_cheetah.py index fd3eca0e1..39af332d0 100644 --- a/gymnasium/envs/mujoco/half_cheetah.py +++ b/gymnasium/envs/mujoco/half_cheetah.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/half_cheetah_v3.py b/gymnasium/envs/mujoco/half_cheetah_v3.py index 4c5e1d3d8..7bd797ac5 100644 --- a/gymnasium/envs/mujoco/half_cheetah_v3.py +++ b/gymnasium/envs/mujoco/half_cheetah_v3.py @@ -3,7 +3,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/half_cheetah_v5.py b/gymnasium/envs/mujoco/half_cheetah_v5.py index 6f9aba173..2de031c19 100644 --- a/gymnasium/envs/mujoco/half_cheetah_v5.py +++ b/gymnasium/envs/mujoco/half_cheetah_v5.py @@ -53,26 +53,27 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): By default, however, the observation space is a `Box(-Inf, Inf, (17,), float64)` where the elements are as follows: - | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Type (Unit) | - | --- | ------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ | - | 0 | z-coordinate of the front tip | -Inf | Inf | rootz | slide | position (m) | - | 1 | angle of the front tip | -Inf | Inf | rooty | hinge | angle (rad) | - | 2 | angle of the second rotor | -Inf | Inf | bthigh | hinge | angle (rad) | - | 3 | angle of the second rotor | -Inf | Inf | bshin | hinge | angle (rad) | - | 4 | velocity of the tip along the x-axis | -Inf | Inf | bfoot | hinge | angle (rad) | - | 5 | velocity of the tip along the y-axis | -Inf | Inf | fthigh | hinge | angle (rad) | - | 6 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angle (rad) | - | 7 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angle (rad) | - | 8 | x-coordinate of the front tip | -Inf | Inf | rootx | slide | velocity (m/s) | - | 9 | y-coordinate of the front tip | -Inf | Inf | rootz | slide | velocity (m/s) | - | 10 | angle of the front tip | -Inf | Inf | rooty | hinge | angular velocity (rad/s) | - | 11 | angle of the second rotor | -Inf | Inf | bthigh | hinge | angular velocity (rad/s) | - | 12 | angle of the second rotor | -Inf | Inf | bshin | hinge | angular velocity (rad/s) | - | 13 | velocity of the tip along the x-axis | -Inf | Inf | bfoot | hinge | angular velocity (rad/s) | - | 14 | velocity of the tip along the y-axis | -Inf | Inf | fthigh | hinge | angular velocity (rad/s) | - | 15 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angular velocity (rad/s) | - | 16 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angular velocity (rad/s) | - | excluded | x-coordinate of the front tip | -Inf | Inf | rootx | slide | position (m) | + + | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Type (Unit) | + | --- | ------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ | + | 0 | z-coordinate of the front tip | -Inf | Inf | rootz | slide | position (m) | + | 1 | angle of the front tip | -Inf | Inf | rooty | hinge | angle (rad) | + | 2 | angle of the back thigh | -Inf | Inf | bthigh | hinge | angle (rad) | + | 3 | angle of the back shin | -Inf | Inf | bshin | hinge | angle (rad) | + | 4 | angle of the back foot | -Inf | Inf | bfoot | hinge | angle (rad) | + | 5 | angle of the front thigh | -Inf | Inf | fthigh | hinge | angle (rad) | + | 6 | angle of the front shin | -Inf | Inf | fshin | hinge | angle (rad) | + | 7 | angle of the front foot | -Inf | Inf | ffoot | hinge | angle (rad) | + | 8 | velocity of the x-coordinate of front tip | -Inf | Inf | rootx | slide | velocity (m/s) | + | 9 | velocity of the z-coordinate of front tip | -Inf | Inf | rootz | slide | velocity (m/s) | + | 10 | angular velocity of the front tip | -Inf | Inf | rooty | hinge | angular velocity (rad/s) | + | 11 | angular velocity of the back thigh | -Inf | Inf | bthigh | hinge | angular velocity (rad/s) | + | 12 | angular velocity of the back shin | -Inf | Inf | bshin | hinge | angular velocity (rad/s) | + | 13 | angular velocity of the back foot | -Inf | Inf | bfoot | hinge | angular velocity (rad/s) | + | 14 | angular velocity of the front thigh | -Inf | Inf | fthigh | hinge | angular velocity (rad/s) | + | 15 | angular velocity of the front shin | -Inf | Inf | fshin | hinge | angular velocity (rad/s) | + | 16 | angular velocity of the front foot | -Inf | Inf | ffoot | hinge | angular velocity (rad/s) | + | excluded | x-coordinate of the front tip | -Inf | Inf | rootx | slide | position (m) | ## Rewards @@ -102,10 +103,10 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination The Half Cheetah never terminates. - #### Truncation + ### Truncation The default duration of an episode is 1000 timesteps. @@ -140,7 +141,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/hopper.py b/gymnasium/envs/mujoco/hopper.py index 1385769d0..7817e80a3 100644 --- a/gymnasium/envs/mujoco/hopper.py +++ b/gymnasium/envs/mujoco/hopper.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/hopper_v3.py b/gymnasium/envs/mujoco/hopper_v3.py index 05d20c90c..c26e357f8 100644 --- a/gymnasium/envs/mujoco/hopper_v3.py +++ b/gymnasium/envs/mujoco/hopper_v3.py @@ -3,7 +3,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/hopper_v5.py b/gymnasium/envs/mujoco/hopper_v5.py index fe8fede85..54a4adf23 100644 --- a/gymnasium/envs/mujoco/hopper_v5.py +++ b/gymnasium/envs/mujoco/hopper_v5.py @@ -101,7 +101,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination If `terminate_when_unhealthy is True` (the default), the environment terminates when the Hopper is unhealthy. The Hopper is unhealthy if any of the following happens: @@ -109,7 +109,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): 2. The height of the hopper (`observation[0]` if `exclude_current_positions_from_observation=True`, otherwise `observation[1]`) is no longer contained in the closed interval specified by the `healthy_z_range` argument (default is $[0.7, +\infty]$) (usually meaning that it has fallen). 3. The angle of the torso (`observation[1]` if `exclude_current_positions_from_observation=True`, otherwise `observation[2]`) is no longer contained in the closed interval specified by the `healthy_angle_range` argument (default is $[-0.2, 0.2]$). - #### Truncation + ### Truncation The default duration of an episode is 1000 timesteps. @@ -151,7 +151,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/humanoid.py b/gymnasium/envs/mujoco/humanoid.py index 15be9b636..0f626039c 100644 --- a/gymnasium/envs/mujoco/humanoid.py +++ b/gymnasium/envs/mujoco/humanoid.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/humanoid_v3.py b/gymnasium/envs/mujoco/humanoid_v3.py index 35e8e28f5..7be2fd850 100644 --- a/gymnasium/envs/mujoco/humanoid_v3.py +++ b/gymnasium/envs/mujoco/humanoid_v3.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/humanoid_v5.py b/gymnasium/envs/mujoco/humanoid_v5.py index bd7d9f3b3..b6b324b47 100644 --- a/gymnasium/envs/mujoco/humanoid_v5.py +++ b/gymnasium/envs/mujoco/humanoid_v5.py @@ -233,13 +233,13 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination If `terminate_when_unhealthy is True` (the default), the environment terminates when the Humanoid is unhealthy. The Humanoid is said to be unhealthy if any of the following happens: 1. The z-coordinate of the torso (the height) is **not** in the closed interval given by the `healthy_z_range` argument (default is $[1.0, 2.0]$). - #### Truncation + ### Truncation The default duration of an episode is 1000 timesteps. @@ -290,9 +290,11 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): - Removed `info["forward_reward"]` as it is equivalent to `info["reward_forward"]`. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3 * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) + - Note: the environment robot model was slightly changed at `gym==0.21.0` and training results are not comparable with `gym<0.21` and `gym>=0.21` (related [GitHub PR](https://github.com/openai/gym/pull/932/files)) * v2: All continuous control environments now use mujoco-py >= 1.50 + - Note: the environment robot model was slightly changed at `gym==0.21.0` and training results are not comparable with `gym<0.21` and `gym>=0.21` (related [GitHub PR](https://github.com/openai/gym/pull/932/files)) * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { @@ -404,7 +406,7 @@ def __init__( "qfrc_actuator": (self.data.qvel.size - 6) * include_qfrc_actuator_in_observation, "cfrc_ext": self.data.cfrc_ext[1:].size * include_cfrc_ext_in_observation, - "ten_lenght": 0, + "ten_length": 0, "ten_velocity": 0, } @@ -481,7 +483,7 @@ def step(self, action): info = { "x_position": self.data.qpos[0], "y_position": self.data.qpos[1], - "tendon_lenght": self.data.ten_length, + "tendon_length": self.data.ten_length, "tendon_velocity": self.data.ten_velocity, "distance_from_origin": np.linalg.norm(self.data.qpos[0:2], ord=2), "x_velocity": x_velocity, @@ -533,7 +535,7 @@ def _get_reset_info(self): return { "x_position": self.data.qpos[0], "y_position": self.data.qpos[1], - "tendon_lenght": self.data.ten_length, + "tendon_length": self.data.ten_length, "tendon_velocity": self.data.ten_velocity, "distance_from_origin": np.linalg.norm(self.data.qpos[0:2], ord=2), } diff --git a/gymnasium/envs/mujoco/humanoidstandup.py b/gymnasium/envs/mujoco/humanoidstandup.py index 9a0cd17f9..1f84a26e6 100644 --- a/gymnasium/envs/mujoco/humanoidstandup.py +++ b/gymnasium/envs/mujoco/humanoidstandup.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/humanoidstandup_v5.py b/gymnasium/envs/mujoco/humanoidstandup_v5.py index 9632a24d7..8477ec0ca 100644 --- a/gymnasium/envs/mujoco/humanoidstandup_v5.py +++ b/gymnasium/envs/mujoco/humanoidstandup_v5.py @@ -223,10 +223,10 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination The Humanoid never terminates. - #### Truncation + ### Truncation The default duration of an episode is 1000 timesteps. @@ -274,7 +274,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): * v3: This environment does not have a v3 release. * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { @@ -382,7 +382,7 @@ def __init__( "qfrc_actuator": (self.data.qvel.size - 6) * include_qfrc_actuator_in_observation, "cfrc_ext": self.data.cfrc_ext[1:].size * include_cfrc_ext_in_observation, - "ten_lenght": 0, + "ten_length": 0, "ten_velocity": 0, } @@ -431,7 +431,7 @@ def step(self, action): "x_position": self.data.qpos[0], "y_position": self.data.qpos[1], "z_distance_from_origin": self.data.qpos[2] - self.init_qpos[2], - "tendon_lenght": self.data.ten_length, + "tendon_length": self.data.ten_length, "tendon_velocity": self.data.ten_velocity, **reward_info, } @@ -482,6 +482,6 @@ def _get_reset_info(self): "x_position": self.data.qpos[0], "y_position": self.data.qpos[1], "z_distance_from_origin": self.data.qpos[2] - self.init_qpos[2], - "tendon_lenght": self.data.ten_length, + "tendon_length": self.data.ten_length, "tendon_velocity": self.data.ten_velocity, } diff --git a/gymnasium/envs/mujoco/inverted_double_pendulum.py b/gymnasium/envs/mujoco/inverted_double_pendulum.py index 48b058447..c67ea9e33 100644 --- a/gymnasium/envs/mujoco/inverted_double_pendulum.py +++ b/gymnasium/envs/mujoco/inverted_double_pendulum.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py b/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py index 1981f692e..07b44dae2 100644 --- a/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py +++ b/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py @@ -91,7 +91,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination The environment terminates when the Inverted Double Pendulum is unhealthy. The Inverted Double Pendulum is unhealthy if any of the following happens: @@ -99,7 +99,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): Note: The maximum standing height of the system is 1.2 m when all the parts are perpendicularly vertical on top of each other. - #### Truncation + ### Truncation The default duration of an episode is 1000 timesteps. @@ -133,7 +133,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): * v3: This environment does not have a v3 release. * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum). - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/inverted_pendulum.py b/gymnasium/envs/mujoco/inverted_pendulum.py index 10d309810..3d2cf9768 100644 --- a/gymnasium/envs/mujoco/inverted_pendulum.py +++ b/gymnasium/envs/mujoco/inverted_pendulum.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/inverted_pendulum_v5.py b/gymnasium/envs/mujoco/inverted_pendulum_v5.py index 02916001f..8e2433dc4 100644 --- a/gymnasium/envs/mujoco/inverted_pendulum_v5.py +++ b/gymnasium/envs/mujoco/inverted_pendulum_v5.py @@ -68,14 +68,14 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination The environment terminates when the Inverted Pendulum is unhealthy. The Inverted Pendulum is unhealthy if any of the following happens: 1. Any of the state space values is no longer finite. 2. The absolute value of the vertical angle between the pole and the cart is greater than 0.2 radians. - #### Truncation + ### Truncation The default duration of an episode is 1000 timesteps. @@ -108,7 +108,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): * v3: This environment does not have a v3 release. * v2: All continuous control environments now use mujoco-py >= 1.5. * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum). - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/mujoco_env.py b/gymnasium/envs/mujoco/mujoco_env.py index 6d1e39339..4a0b3c056 100644 --- a/gymnasium/envs/mujoco/mujoco_env.py +++ b/gymnasium/envs/mujoco/mujoco_env.py @@ -5,23 +5,17 @@ from numpy.typing import NDArray import gymnasium as gym -from gymnasium import error, logger, spaces +from gymnasium import error, spaces from gymnasium.spaces import Space -try: - import mujoco_py -except ImportError as e: - MUJOCO_PY_IMPORT_ERROR = e -else: - MUJOCO_PY_IMPORT_ERROR = None - try: import mujoco except ImportError as e: - MUJOCO_IMPORT_ERROR = e -else: - MUJOCO_IMPORT_ERROR = None + raise error.DependencyNotInstalled( + "Could not import mujoco" + "(HINT: you need to install mujoco, run `pip install gymnasium[mujoco]`.)" + ) from e DEFAULT_SIZE = 480 @@ -200,153 +194,6 @@ def state_vector(self) -> NDArray[np.float64]: return np.concatenate([self.data.qpos.flat, self.data.qvel.flat]) -class MuJocoPyEnv(BaseMujocoEnv): - def __init__( - self, - model_path: str, - frame_skip: int, - observation_space: Space, - render_mode: Optional[str] = None, - width: int = DEFAULT_SIZE, - height: int = DEFAULT_SIZE, - camera_id: Optional[int] = None, - camera_name: Optional[str] = None, - ): - if MUJOCO_PY_IMPORT_ERROR is not None: - raise error.DependencyNotInstalled( - f"{MUJOCO_PY_IMPORT_ERROR}. " - "(HINT: you need to install mujoco-py, and also perform the setup instructions " - "here: https://github.com/openai/mujoco-py.)" - ) - - logger.deprecation( - "This version of the mujoco environments depends " - "on the mujoco-py bindings, which are no longer maintained " - "and may stop working. Please upgrade to the v5 or v4 versions of " - "the environments (which depend on the mujoco python bindings instead), unless " - "you are trying to precisely replicate previous works)." - ) - - self.viewer = None - self._viewers = {} - - super().__init__( - model_path, - frame_skip, - observation_space, - render_mode, - width, - height, - camera_id, - camera_name, - ) - - def _initialize_simulation(self): - model = mujoco_py.load_model_from_path(self.fullpath) - self.sim = mujoco_py.MjSim(model) - data = self.sim.data - return model, data - - def _reset_simulation(self): - self.sim.reset() - - def set_state(self, qpos, qvel): - super().set_state(qpos, qvel) - state = self.sim.get_state() - state = mujoco_py.MjSimState(state.time, qpos, qvel, state.act, state.udd_state) - self.sim.set_state(state) - self.sim.forward() - - def get_body_com(self, body_name): - return self.data.get_body_xpos(body_name) - - def _step_mujoco_simulation(self, ctrl, n_frames): - self.sim.data.ctrl[:] = ctrl - - for _ in range(n_frames): - self.sim.step() - - def render(self): - if self.render_mode is None: - assert self.spec is not None - gym.logger.warn( - "You are calling render method without specifying any render mode. " - "You can specify the render_mode at initialization, " - f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")' - ) - return - - width, height = self.width, self.height - camera_name, camera_id = self.camera_name, self.camera_id - if self.render_mode in {"rgb_array", "depth_array"}: - if camera_id is not None and camera_name is not None: - raise ValueError( - "Both `camera_id` and `camera_name` cannot be" - " specified at the same time." - ) - - no_camera_specified = camera_name is None and camera_id is None - if no_camera_specified: - camera_name = "track" - - if camera_id is None and camera_name in self.model._camera_name2id: - if camera_name in self.model._camera_name2id: - camera_id = self.model.camera_name2id(camera_name) - - self._get_viewer(self.render_mode).render( - width, height, camera_id=camera_id - ) - - if self.render_mode == "rgb_array": - data = self._get_viewer(self.render_mode).read_pixels( - width, height, depth=False - ) - # original image is upside-down, so flip it - return data[::-1, :, :] - elif self.render_mode == "depth_array": - self._get_viewer(self.render_mode).render(width, height) - # Extract depth part of the read_pixels() tuple - data = self._get_viewer(self.render_mode).read_pixels( - width, height, depth=True - )[1] - # original image is upside-down, so flip it - return data[::-1, :] - elif self.render_mode == "human": - self._get_viewer(self.render_mode).render() - - def _get_viewer( - self, mode - ) -> Union["mujoco_py.MjViewer", "mujoco_py.MjRenderContextOffscreen"]: - self.viewer = self._viewers.get(mode) - if self.viewer is None: - if mode == "human": - self.viewer = mujoco_py.MjViewer(self.sim) - - elif mode in {"rgb_array", "depth_array"}: - self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1) - else: - raise AttributeError( - f"Unknown mode: {mode}, expected modes: {self.metadata['render_modes']}" - ) - - self.viewer_setup() - self._viewers[mode] = self.viewer - - return self.viewer - - def close(self): - if self.viewer is not None: - self.viewer = None - self._viewers = {} - - def viewer_setup(self): - """ - This method is called when the viewer is initialized. - Optionally implement this method, if you need to tinker with camera position and so forth. - """ - raise NotImplementedError - - class MujocoEnv(BaseMujocoEnv): """Superclass for MuJoCo environments.""" @@ -363,12 +210,6 @@ def __init__( default_camera_config: Optional[Dict[str, Union[float, int]]] = None, max_geom: int = 1000, ): - if MUJOCO_IMPORT_ERROR is not None: - raise error.DependencyNotInstalled( - f"{MUJOCO_IMPORT_ERROR}. " - "(HINT: you need to install mujoco, run `pip install gymnasium[mujoco]`.)" - ) - super().__init__( model_path, frame_skip, diff --git a/gymnasium/envs/mujoco/mujoco_py_env.py b/gymnasium/envs/mujoco/mujoco_py_env.py new file mode 100644 index 000000000..8d2949102 --- /dev/null +++ b/gymnasium/envs/mujoco/mujoco_py_env.py @@ -0,0 +1,342 @@ +from os import path +from typing import Any, Dict, Optional, Tuple, Union + +import numpy as np +from numpy.typing import NDArray + +import gymnasium as gym +from gymnasium import error, logger, spaces +from gymnasium.spaces import Space + + +try: + import mujoco_py +except ImportError as e: + raise error.DependencyNotInstalled( + "Could not import mujoco_py, which is needed for MuJoCo environments older than V4", + "You could either use a newer version of the environments, or install the (deprecated) mujoco-py package" + "following the instructions on their GitHub page.", + ) from e + + +# NOTE: duplication of analogous code in mujoco_env.py +# Support for mujoco-py based envs is deprecated, so this module will no longer be maintained +# The code is duplicated instead of imported for it to be working in standalone and independent +# of the further development of the maintained mujoco_env.py +# noinspection DuplicatedCode +DEFAULT_SIZE = 480 + + +# noinspection DuplicatedCode +def expand_model_path(model_path: str) -> str: + """Expands the `model_path` to a full path if it starts with '~' or '.' or '/'.""" + if model_path.startswith(".") or model_path.startswith("/"): + fullpath = model_path + elif model_path.startswith("~"): + fullpath = path.expanduser(model_path) + else: + fullpath = path.join(path.dirname(__file__), "assets", model_path) + if not path.exists(fullpath): + raise OSError(f"File {fullpath} does not exist") + + return fullpath + + +# noinspection DuplicatedCode +class BaseMujocoPyEnv(gym.Env[NDArray[np.float64], NDArray[np.float32]]): + """Superclass for all MuJoCo environments.""" + + def __init__( + self, + model_path, + frame_skip, + observation_space: Optional[Space], + render_mode: Optional[str] = None, + width: int = DEFAULT_SIZE, + height: int = DEFAULT_SIZE, + camera_id: Optional[int] = None, + camera_name: Optional[str] = None, + ): + """Base abstract class for mujoco based environments. + + Args: + model_path: Path to the MuJoCo Model. + frame_skip: Number of MuJoCo simulation steps per gym `step()`. + observation_space: The observation space of the environment. + render_mode: The `render_mode` used. + width: The width of the render window. + height: The height of the render window. + camera_id: The camera ID used. + camera_name: The name of the camera used (can not be used in conjunction with `camera_id`). + + Raises: + OSError: when the `model_path` does not exist. + error.DependencyNotInstalled: When `mujoco` is not installed. + """ + self.fullpath = expand_model_path(model_path) + + self.width = width + self.height = height + # may use width and height + self.model, self.data = self._initialize_simulation() + + self.init_qpos = self.data.qpos.ravel().copy() + self.init_qvel = self.data.qvel.ravel().copy() + + self.frame_skip = frame_skip + + assert self.metadata["render_modes"] == [ + "human", + "rgb_array", + "depth_array", + ], self.metadata["render_modes"] + if "render_fps" in self.metadata: + assert ( + int(np.round(1.0 / self.dt)) == self.metadata["render_fps"] + ), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}' + if observation_space is not None: + self.observation_space = observation_space + self._set_action_space() + + self.render_mode = render_mode + self.camera_name = camera_name + self.camera_id = camera_id + + def _set_action_space(self): + bounds = self.model.actuator_ctrlrange.copy().astype(np.float32) + low, high = bounds.T + self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) + return self.action_space + + # methods to override: + # ---------------------------- + def step( + self, action: NDArray[np.float32] + ) -> Tuple[NDArray[np.float64], np.float64, bool, bool, Dict[str, np.float64]]: + raise NotImplementedError + + def reset_model(self) -> NDArray[np.float64]: + """ + Reset the robot degrees of freedom (qpos and qvel). + Implement this in each subclass. + """ + raise NotImplementedError + + def _initialize_simulation(self) -> Tuple[Any, Any]: + """ + Initialize MuJoCo simulation data structures mjModel and mjData. + """ + raise NotImplementedError + + def _reset_simulation(self) -> None: + """ + Reset MuJoCo simulation data structures, mjModel and mjData. + """ + raise NotImplementedError + + def _step_mujoco_simulation(self, ctrl, n_frames) -> None: + """ + Step over the MuJoCo simulation. + """ + raise NotImplementedError + + def render(self) -> Union[NDArray[np.float64], None]: + """ + Render a frame from the MuJoCo simulation as specified by the render_mode. + """ + raise NotImplementedError + + # ----------------------------- + def _get_reset_info(self) -> Dict[str, float]: + """Function that generates the `info` that is returned during a `reset()`.""" + return {} + + def reset( + self, + *, + seed: Optional[int] = None, + options: Optional[dict] = None, + ): + super().reset(seed=seed) + + self._reset_simulation() + + ob = self.reset_model() + info = self._get_reset_info() + + if self.render_mode == "human": + self.render() + return ob, info + + def set_state(self, qpos, qvel) -> None: + """ + Set the joints position qpos and velocity qvel of the model. Override this method depending on the MuJoCo bindings used. + """ + assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) + + @property + def dt(self) -> float: + return self.model.opt.timestep * self.frame_skip + + def do_simulation(self, ctrl, n_frames) -> None: + """ + Step the simulation n number of frames and applying a control action. + """ + # Check control input is contained in the action space + if np.array(ctrl).shape != (self.model.nu,): + raise ValueError( + f"Action dimension mismatch. Expected {(self.model.nu,)}, found {np.array(ctrl).shape}" + ) + self._step_mujoco_simulation(ctrl, n_frames) + + def close(self): + """Close all processes like rendering contexts""" + raise NotImplementedError + + def get_body_com(self, body_name) -> NDArray[np.float64]: + """Return the cartesian position of a body frame""" + raise NotImplementedError + + def state_vector(self) -> NDArray[np.float64]: + """Return the position and velocity joint states of the model""" + return np.concatenate([self.data.qpos.flat, self.data.qvel.flat]) + + +class MuJocoPyEnv(BaseMujocoPyEnv): + def __init__( + self, + model_path: str, + frame_skip: int, + observation_space: Space, + render_mode: Optional[str] = None, + width: int = DEFAULT_SIZE, + height: int = DEFAULT_SIZE, + camera_id: Optional[int] = None, + camera_name: Optional[str] = None, + ): + logger.deprecation( + "This version of the mujoco environments depends " + "on the mujoco-py bindings, which are no longer maintained " + "and may stop working. Please upgrade to the v5 or v4 versions of " + "the environments (which depend on the mujoco python bindings instead), unless " + "you are trying to precisely replicate previous works)." + ) + + self.viewer = None + self._viewers = {} + + super().__init__( + model_path, + frame_skip, + observation_space, + render_mode, + width, + height, + camera_id, + camera_name, + ) + + def _initialize_simulation(self): + model = mujoco_py.load_model_from_path(self.fullpath) + self.sim = mujoco_py.MjSim(model) + data = self.sim.data + return model, data + + def _reset_simulation(self): + self.sim.reset() + + def set_state(self, qpos, qvel): + super().set_state(qpos, qvel) + state = self.sim.get_state() + state = mujoco_py.MjSimState(state.time, qpos, qvel, state.act, state.udd_state) + self.sim.set_state(state) + self.sim.forward() + + def get_body_com(self, body_name): + return self.data.get_body_xpos(body_name) + + def _step_mujoco_simulation(self, ctrl, n_frames): + self.sim.data.ctrl[:] = ctrl + + for _ in range(n_frames): + self.sim.step() + + def render(self): + if self.render_mode is None: + assert self.spec is not None + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")' + ) + return + + width, height = self.width, self.height + camera_name, camera_id = self.camera_name, self.camera_id + if self.render_mode in {"rgb_array", "depth_array"}: + if camera_id is not None and camera_name is not None: + raise ValueError( + "Both `camera_id` and `camera_name` cannot be" + " specified at the same time." + ) + + no_camera_specified = camera_name is None and camera_id is None + if no_camera_specified: + camera_name = "track" + + if camera_id is None and camera_name in self.model._camera_name2id: + if camera_name in self.model._camera_name2id: + camera_id = self.model.camera_name2id(camera_name) + + self._get_viewer(self.render_mode).render( + width, height, camera_id=camera_id + ) + + if self.render_mode == "rgb_array": + data = self._get_viewer(self.render_mode).read_pixels( + width, height, depth=False + ) + # original image is upside-down, so flip it + return data[::-1, :, :] + elif self.render_mode == "depth_array": + self._get_viewer(self.render_mode).render(width, height) + # Extract depth part of the read_pixels() tuple + data = self._get_viewer(self.render_mode).read_pixels( + width, height, depth=True + )[1] + # original image is upside-down, so flip it + return data[::-1, :] + elif self.render_mode == "human": + self._get_viewer(self.render_mode).render() + + def _get_viewer( + self, mode + ) -> Union["mujoco_py.MjViewer", "mujoco_py.MjRenderContextOffscreen"]: + self.viewer = self._viewers.get(mode) + if self.viewer is None: + if mode == "human": + self.viewer = mujoco_py.MjViewer(self.sim) + + elif mode in {"rgb_array", "depth_array"}: + self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1) + else: + raise AttributeError( + f"Unknown mode: {mode}, expected modes: {self.metadata['render_modes']}" + ) + + self.viewer_setup() + self._viewers[mode] = self.viewer + + return self.viewer + + def close(self): + if self.viewer is not None: + self.viewer = None + self._viewers = {} + + def viewer_setup(self): + """ + This method is called when the viewer is initialized. + Optionally implement this method, if you need to tinker with camera position and so forth. + """ + raise NotImplementedError diff --git a/gymnasium/envs/mujoco/pusher.py b/gymnasium/envs/mujoco/pusher.py index 8fcb8f418..a736b4aa2 100644 --- a/gymnasium/envs/mujoco/pusher.py +++ b/gymnasium/envs/mujoco/pusher.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/pusher_v5.py b/gymnasium/envs/mujoco/pusher_v5.py index 2fb8f4d8b..c9082e326 100644 --- a/gymnasium/envs/mujoco/pusher_v5.py +++ b/gymnasium/envs/mujoco/pusher_v5.py @@ -117,10 +117,10 @@ class PusherEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination The Pusher never terminates. - #### Truncation + ### Truncation The default duration of an episode is 100 timesteps. @@ -154,7 +154,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle): * v3: This environment does not have a v3 release. * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks (not including pusher, which has a max_time_steps of 100). Added reward_threshold to environments. - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/reacher.py b/gymnasium/envs/mujoco/reacher.py index 73034789a..21cf7b392 100644 --- a/gymnasium/envs/mujoco/reacher.py +++ b/gymnasium/envs/mujoco/reacher.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py index a586ff2e1..0acf55536 100644 --- a/gymnasium/envs/mujoco/reacher_v5.py +++ b/gymnasium/envs/mujoco/reacher_v5.py @@ -97,10 +97,10 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination The Reacher never terminates. - #### Truncation + ### Truncation The default duration of an episode is 50 timesteps. @@ -133,7 +133,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): * v3: This environment does not have a v3 release. * v2: All continuous control environments now use mujoco-py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments. - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/mujoco/swimmer.py b/gymnasium/envs/mujoco/swimmer.py index 19f8f562e..05769b967 100644 --- a/gymnasium/envs/mujoco/swimmer.py +++ b/gymnasium/envs/mujoco/swimmer.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/swimmer_v3.py b/gymnasium/envs/mujoco/swimmer_v3.py index a5f6b7d29..90cb37093 100644 --- a/gymnasium/envs/mujoco/swimmer_v3.py +++ b/gymnasium/envs/mujoco/swimmer_v3.py @@ -3,7 +3,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/swimmer_v5.py b/gymnasium/envs/mujoco/swimmer_v5.py index 4515c5012..05395a35c 100644 --- a/gymnasium/envs/mujoco/swimmer_v5.py +++ b/gymnasium/envs/mujoco/swimmer_v5.py @@ -98,10 +98,10 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination The Swimmer never terminates. - #### Truncation + ### Truncation The default duration of an episode is 1000 timesteps. @@ -140,7 +140,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/utils.py b/gymnasium/envs/mujoco/utils.py new file mode 100644 index 000000000..54e50fa1a --- /dev/null +++ b/gymnasium/envs/mujoco/utils.py @@ -0,0 +1,64 @@ +"""A set of MujocoEnv related utilities, mainly for testing purposes. + +Author: @Kallinteris-Andreas +""" + +import mujoco +import numpy as np + +import gymnasium + + +def get_state( + env: gymnasium.envs.mujoco.MujocoEnv, + state_type: mujoco.mjtState = mujoco.mjtState.mjSTATE_PHYSICS, +): + """Gets the state of `env`. + + Arguments: + env: Environment whose state to copy, `env.model` & `env.data` must be accessible. + state_type: see the [documentation of mjtState](https://mujoco.readthedocs.io/en/stable/APIreference/APItypes.html#mjtstate) most users can use the default for training purposes or `mujoco.mjtState.mjSTATE_INTEGRATION` for validation purposes. + """ + assert mujoco.__version__ >= "2.3.6", "Feature requires `mujuco>=2.3.6`" + + state = np.empty(mujoco.mj_stateSize(env.unwrapped.model, state_type)) + mujoco.mj_getState(env.unwrapped.model, env.unwrapped.data, state, state_type) + return state + + +def set_state( + env: gymnasium.envs.mujoco.MujocoEnv, + state: np.ndarray, + state_type: mujoco.mjtState = mujoco.mjtState.mjSTATE_PHYSICS, +): + """Set the state of `env`. + + Arguments: + env: Environment whose state to set, `env.model` & `env.data` must be accessible. + state: State to set (generated from get_state). + state_type: see the [documentation of mjtState](https://mujoco.readthedocs.io/en/stable/APIreference/APItypes.html#mjtstate) most users can use the default for training purposes or `mujoco.mjtState.mjSTATE_INTEGRATION` for validation purposes. + """ + assert mujoco.__version__ >= "2.3.6", "Feature requires `mujuco>=2.3.6`" + + mujoco.mj_setState( + env.unwrapped.model, + env.unwrapped.data, + state, + spec=mujoco.mjtState.mjSTATE_PHYSICS, + ) + return state + + +def check_mujoco_reset_state(env: gymnasium.envs.mujoco.MujocoEnv, seed=1234): + """Asserts that `env.reset` properly resets the state (not affected by previous steps), assuming `check_reset_seed` has passed.""" + env.action_space.seed(seed) + action = env.action_space.sample() + + env.reset(seed=seed) + first_reset_state = get_state(env, mujoco.mjtState.mjSTATE_INTEGRATION) + env.step(action) + + env.reset(seed=seed) + second_reset_state = get_state(env, mujoco.mjtState.mjSTATE_INTEGRATION) + + assert np.all(first_reset_state == second_reset_state), "reset is not deterministic" diff --git a/gymnasium/envs/mujoco/walker2d.py b/gymnasium/envs/mujoco/walker2d.py index 52010981b..e46dc49c1 100644 --- a/gymnasium/envs/mujoco/walker2d.py +++ b/gymnasium/envs/mujoco/walker2d.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/walker2d_v3.py b/gymnasium/envs/mujoco/walker2d_v3.py index 13f74beca..99b8ae48a 100644 --- a/gymnasium/envs/mujoco/walker2d_v3.py +++ b/gymnasium/envs/mujoco/walker2d_v3.py @@ -1,7 +1,7 @@ import numpy as np from gymnasium import utils -from gymnasium.envs.mujoco import MuJocoPyEnv +from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv from gymnasium.spaces import Box diff --git a/gymnasium/envs/mujoco/walker2d_v5.py b/gymnasium/envs/mujoco/walker2d_v5.py index f4f57ef1e..c9b22517f 100644 --- a/gymnasium/envs/mujoco/walker2d_v5.py +++ b/gymnasium/envs/mujoco/walker2d_v5.py @@ -109,7 +109,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): ## Episode End - #### Termination + ### Termination If `terminate_when_unhealthy is True` (which is the default), the environment terminates when the Walker2d is unhealthy. The Walker2d is unhealthy if any of the following happens: @@ -117,7 +117,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): 2. The z-coordinate of the torso (the height) is **not** in the closed interval given by the `healthy_z_range` argument (default to $[0.8, 1.0]$). 3. The absolute value of the angle (`observation[1]` if `exclude_current_positions_from_observation=False`, else `observation[2]`) is ***not*** in the closed interval specified by the `healthy_angle_range` argument (default is $[-1, 1]$). - #### Truncation + ### Truncation The default duration of an episode is 1000 timesteps. @@ -160,7 +160,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco-py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/registration.py b/gymnasium/envs/registration.py index 1774d9b11..f8db9139c 100644 --- a/gymnasium/envs/registration.py +++ b/gymnasium/envs/registration.py @@ -915,6 +915,15 @@ def create_single_env() -> Env: ) elif vectorization_mode == VectorizeMode.VECTOR_ENTRY_POINT: + if len(vector_kwargs) > 0: + raise error.Error( + f"Custom vector environment can be passed arguments only through kwargs and `vector_kwargs` is not empty ({vector_kwargs})" + ) + if len(wrappers) > 0: + raise error.Error( + "Cannot use `vector_entry_point` vectorization mode with the wrappers argument." + ) + entry_point = env_spec.vector_entry_point if entry_point is None: raise error.Error( @@ -925,15 +934,13 @@ def create_single_env() -> Env: else: # Assume it's a string env_creator = load_env_creator(entry_point) - if len(wrappers) > 0: - raise error.Error( - "Cannot use `vector_entry_point` vectorization mode with the wrappers argument." - ) - if "max_episode_steps" not in vector_kwargs: - assert vector_kwargs is not None - vector_kwargs["max_episode_steps"] = env_spec.max_episode_steps + if ( + env_spec.max_episode_steps is not None + and "max_episode_steps" not in env_spec_kwargs + ): + env_spec_kwargs["max_episode_steps"] = env_spec.max_episode_steps - env = env_creator(num_envs=num_envs, **vector_kwargs) + env = env_creator(num_envs=num_envs, **env_spec_kwargs) else: raise error.Error(f"Unknown vectorization mode: {vectorization_mode}") diff --git a/gymnasium/envs/toy_text/blackjack.py b/gymnasium/envs/toy_text/blackjack.py index 2792bd77f..7fed3533e 100644 --- a/gymnasium/envs/toy_text/blackjack.py +++ b/gymnasium/envs/toy_text/blackjack.py @@ -90,13 +90,13 @@ class BlackjackEnv(gym.Env): The observation is returned as `(int(), int(), int())`. ## Starting State - The starting state is initialised in the following range. + The starting state is initialised with the following values. - | Observation | Min | Max | - |---------------------------|------|------| - | Player current sum | 4 | 12 | - | Dealer showing card value | 2 | 11 | - | Usable Ace | 0 | 1 | + | Observation | Values | + |---------------------------|----------------| + | Player current sum | 4, 5, ..., 21 | + | Dealer showing card value | 1, 2, ..., 10 | + | Usable Ace | 0, 1 | ## Rewards - win game: +1 diff --git a/gymnasium/spaces/multi_discrete.py b/gymnasium/spaces/multi_discrete.py index 09f930ff8..caf488764 100644 --- a/gymnasium/spaces/multi_discrete.py +++ b/gymnasium/spaces/multi_discrete.py @@ -172,7 +172,7 @@ def from_jsonable( self, sample_n: list[Sequence[int]] ) -> list[NDArray[np.integer[Any]]]: """Convert a JSONable data type to a batch of samples from this space.""" - return [np.array(sample) for sample in sample_n] + return [np.array(sample, dtype=np.int64) for sample in sample_n] def __repr__(self): """Gives a string representation of this space.""" diff --git a/gymnasium/utils/env_checker.py b/gymnasium/utils/env_checker.py index e447f7440..c161c2128 100644 --- a/gymnasium/utils/env_checker.py +++ b/gymnasium/utils/env_checker.py @@ -30,40 +30,45 @@ ) -def data_equivalence(data_1, data_2) -> bool: +def data_equivalence(data_1, data_2, exact: bool = False) -> bool: """Assert equality between data 1 and 2, i.e observations, actions, info. Args: data_1: data structure 1 data_2: data structure 2 + exact: whether to compare array exactly or not if false compares with absolute and realive torrelance of 1e-5 (for more information check [np.allclose](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html)). Returns: If observation 1 and 2 are equivalent """ - if type(data_1) is type(data_2): - if isinstance(data_1, dict): - return data_1.keys() == data_2.keys() and all( - data_equivalence(data_1[k], data_2[k]) for k in data_1.keys() - ) - elif isinstance(data_1, (tuple, list)): - return len(data_1) == len(data_2) and all( - data_equivalence(o_1, o_2) for o_1, o_2 in zip(data_1, data_2) - ) - elif isinstance(data_1, np.ndarray): - if data_1.shape == data_2.shape and data_1.dtype == data_2.dtype: - if data_1.dtype == object: - return all(data_equivalence(a, b) for a, b in zip(data_1, data_2)) - else: - return np.allclose(data_1, data_2, atol=0.00001) + if type(data_1) is not type(data_2): + return False + if isinstance(data_1, dict): + return data_1.keys() == data_2.keys() and all( + data_equivalence(data_1[k], data_2[k], exact) for k in data_1.keys() + ) + elif isinstance(data_1, (tuple, list)): + return len(data_1) == len(data_2) and all( + data_equivalence(o_1, o_2, exact) for o_1, o_2 in zip(data_1, data_2) + ) + elif isinstance(data_1, np.ndarray): + if data_1.shape == data_2.shape and data_1.dtype == data_2.dtype: + if data_1.dtype == object: + return all( + data_equivalence(a, b, exact) for a, b in zip(data_1, data_2) + ) else: - return False + if exact: + return np.all(data_1 == data_2) + else: + return np.allclose(data_1, data_2, rtol=1e-5, atol=1e-5) else: - return data_1 == data_2 + return False else: - return False + return data_1 == data_2 -def check_reset_seed(env: gym.Env): +def check_reset_seed_determinism(env: gym.Env): """Check that the environment can be reset with a seed. Args: @@ -84,12 +89,9 @@ def check_reset_seed(env: gym.Env): obs_1 in env.observation_space ), "The observation returned by `env.reset(seed=123)` is not within the observation space." assert ( - env.unwrapped._np_random # pyright: ignore [reportPrivateUsage] - is not None + env.unwrapped._np_random is not None ), "Expects the random number generator to have been generated given a seed was passed to reset. Mostly likely the environment reset function does not call `super().reset(seed=seed)`." - seed_123_rng = deepcopy( - env.unwrapped._np_random # pyright: ignore [reportPrivateUsage] - ) + seed_123_rng = deepcopy(env.unwrapped._np_random) obs_2, info = env.reset(seed=123) assert ( @@ -100,7 +102,7 @@ def check_reset_seed(env: gym.Env): obs_1, obs_2 ), "Using `env.reset(seed=123)` is non-deterministic as the observations are not equivalent." assert ( - env.unwrapped._np_random.bit_generator.state # pyright: ignore [reportPrivateUsage] + env.unwrapped._np_random.bit_generator.state == seed_123_rng.bit_generator.state ), "Mostly likely the environment reset function does not call `super().reset(seed=seed)` as the random generates are not same when the same seeds are passed to `env.reset`." @@ -109,7 +111,7 @@ def check_reset_seed(env: gym.Env): obs_3 in env.observation_space ), "The observation returned by `env.reset(seed=456)` is not within the observation space." assert ( - env.unwrapped._np_random.bit_generator.state # pyright: ignore [reportPrivateUsage] + env.unwrapped._np_random.bit_generator.state != seed_123_rng.bit_generator.state ), "Mostly likely the environment reset function does not call `super().reset(seed=seed)` as the random number generators are not different when different seeds are passed to `env.reset`." @@ -160,6 +162,42 @@ def check_reset_options(env: gym.Env): ) +def check_step_determinism(env: gym.Env, seed=123): + """Check that the environment steps deterministically after reset. + + Note: This check assumes that seeded `reset()` is derministic (it must have passed `check_reset_seed`) and that `step()` returns valid values (passed `env_step_passive_checker`). + Note: A single step should be enough to assert that the state transition function is deterministic (at least for most environments). + + Raises: + AssertionError: The environment cannot be step determistially after resetting with a random seed, + or it truncates after 1 step. + """ + if env.spec is not None and env.spec.nondeterministic is True: + return + + env.action_space.seed(seed) + action = env.action_space.sample() + + env.reset(seed=seed) + obs_0, rew_0, term_0, trunc_0, info_0 = env.step(action) + seeded_rng: np.random.Generator = deepcopy(env.unwrapped._np_random) + + env.reset(seed=seed) + obs_1, rew_1, term_1, trunc_1, info_1 = env.step(action) + + assert ( + env.unwrapped._np_random.bit_generator.state # pyright: ignore [reportOptionalMemberAccess] + == seeded_rng.bit_generator.state + ), "The `.np_random` is not properly been updated after step." + assert data_equivalence(obs_0, obs_1), "step observation is not deterministic." + assert data_equivalence(rew_0, rew_1), "step reward is not deterministic." + assert data_equivalence(term_0, term_0), "step terminal is not deterministic." + assert ( + trunc_0 is False and trunc_1 is False + ), "Environment truncates after 1 step, something has gone very wrong." + assert data_equivalence(info_0, info_1), "step info is not deterministic." + + def check_reset_return_info_deprecation(env: gym.Env): """Makes sure support for deprecated `return_info` argument is dropped. @@ -306,13 +344,16 @@ def check_env( check_seed_deprecation(env) check_reset_return_info_deprecation(env) check_reset_return_type(env) - check_reset_seed(env) + check_reset_seed_determinism(env) check_reset_options(env) # ============ Check the returned values =============== env_reset_passive_checker(env) env_step_passive_checker(env, env.action_space.sample()) + # ==== Check the step method ==== + check_step_determinism(env) + # ==== Check the render method and the declared render modes ==== if not skip_render_check: if env.render_mode is not None: diff --git a/gymnasium/utils/env_match.py b/gymnasium/utils/env_match.py index f0c6e21bf..60c522334 100644 --- a/gymnasium/utils/env_match.py +++ b/gymnasium/utils/env_match.py @@ -59,18 +59,24 @@ def check_environments_match( assert skip_obs or data_equivalence( obs_a, obs_b - ), "resetting observation is not equivalent" + ), f"resetting observation is not equivalent, observation_a = {obs_a}, observation_b = {obs_b}" if info_comparison == "equivalence": - assert data_equivalence(info_a, info_b), "resetting info is not equivalent" + assert data_equivalence( + info_a, info_b + ), f"resetting info is not equivalent, info_a = {info_a}, info_b = {info_b}" elif info_comparison == "superset": for key in info_a: assert data_equivalence( info_a[key], info_b[key] - ), "resetting info is not a superset" + ), f"resetting info is not a superset, key {key} present in info_a with value = {info_a[key]}, in info_b with value = {info_b[key]}" elif info_comparison == "keys-equivalance": - assert info_a.keys() == info_b.keys(), "resetting info keys are not equivalent" + assert ( + info_a.keys() == info_b.keys() + ), f"resetting info keys are not equivalent, info_a's keys are {info_a.keys()}, info_b's keys are {info_b.keys()}" elif info_comparison == "keys-superset": - assert info_b.keys() >= info_a.keys(), "resetting info keys are not a superset" + assert ( + info_b.keys() >= info_a.keys() + ), f"resetting info keys are not a superset, keys not present in info_b are: {info_b.keys() - info_a.keys()}" if not skip_render: assert ( @@ -83,60 +89,62 @@ def check_environments_match( obs_b, rew_b, terminal_b, truncated_b, info_b = env_b.step(action) assert skip_obs or data_equivalence( obs_a, obs_b - ), "stepping observation is not equivalent" + ), f"stepping observation is not equivalent in step = {step}, observation_a = {obs_a}, observation_b = {obs_b}" assert skip_rew or data_equivalence( rew_a, rew_b - ), "stepping reward is not equivalent" + ), f"stepping reward is not equivalent in step = {step}, reward_a = {rew_a}, reward_b = {rew_b}" assert ( skip_terminal or terminal_a == terminal_b - ), "stepping terminal is not equivalent" + ), f"stepping terminal is not equivalent in step = {step}, terminal_a = {terminal_a}, terminal_b = {terminal_b}" assert ( skip_truncated or truncated_a == truncated_b - ), "stepping truncated is not equivalent" + ), f"stepping truncated is not equivalent in step = {step}, truncated_a = {truncated_a}, truncated_b = {truncated_b}" if info_comparison == "equivalence": - assert data_equivalence(info_a, info_b), "stepping info is not equivalent" + assert data_equivalence( + info_a, info_b + ), f"stepping info is not equivalent in step = {step}, info_a = {info_a}, info_b = {info_b}" elif info_comparison == "superset": for key in info_a: assert data_equivalence( info_a[key], info_b[key] - ), "stepping info is not a superset" + ), f"stepping info is not a superset in step = {step}, key {key} present in info_a with value = {info_a[key]}, in info_b with value = {info_b[key]}" elif info_comparison == "keys-equivalance": assert ( info_a.keys() == info_b.keys() - ), "stepping info keys are not equivalent" + ), f"stepping info keys are not equivalent in step = {step}, info_a's keys are {info_a.keys()}, info_b's keys are {info_b.keys()}" elif info_comparison == "keys-superset": assert ( info_b.keys() >= info_a.keys() - ), "stepping info keys are not a superset" + ), f"stepping info keys are not a superset in step = {step}, keys not present in info_b are: {info_b.keys() - info_a.keys()}" if not skip_render: assert ( env_a.render() == env_b.render() - ).all(), "stepping render is not equivalent" + ).all(), "stepping render is not equivalent in step = {step}" if terminal_a or truncated_a or terminal_b or truncated_b: obs_a, info_a = env_a.reset(seed=seed) obs_b, info_b = env_b.reset(seed=seed) assert skip_obs or data_equivalence( obs_a, obs_b - ), "resetting observation is not equivalent" + ), f"resetting observation is not equivalent in step = {step}, observation_a = {obs_a}, observation_b = {obs_b}" if info_comparison == "equivalence": assert data_equivalence( info_a, info_b - ), "resetting info is not equivalent" + ), f"resetting info is not equivalent in step = {step}, info_a = {info_a}, info_b = {info_b}" elif info_comparison == "superset": for key in info_a: assert data_equivalence( info_a[key], info_b[key] - ), "resetting info is not a superset" + ), f"resetting info is not a superset in step = {step}, key {key} present in info_a with value = {info_a[key]}, in info_b with value = {info_b[key]}" elif info_comparison == "keys-equivalance": assert ( info_a.keys() == info_b.keys() - ), "resetting info keys are not equivalent" + ), f"resetting info keys are not equivalent in step = {step}, info_a's keys are {info_a.keys()}, info_b's keys are {info_b.keys()}" elif info_comparison == "keys-superset": assert ( info_b.keys() >= info_a.keys() - ), "resetting info keys are not a superset" + ), f"resetting info keys are not a superset in step = {step}, keys not present in info_b are: {info_b.keys() - info_a.keys()}" if not skip_render: assert ( env_a.render() == env_b.render() - ).all(), "resetting render is not equivalent" + ).all(), "resetting render is not equivalent in step = {step}" diff --git a/gymnasium/vector/async_vector_env.py b/gymnasium/vector/async_vector_env.py index 4edef093b..9bd28fb02 100644 --- a/gymnasium/vector/async_vector_env.py +++ b/gymnasium/vector/async_vector_env.py @@ -197,6 +197,16 @@ def __init__( self._state = AsyncState.DEFAULT self._check_spaces() + @property + def np_random_seed(self) -> tuple[int, ...]: + """Returns the seeds of the wrapped envs.""" + return self.get_attr("np_random_seed") + + @property + def np_random(self) -> tuple[np.random.Generator, ...]: + """Returns the numpy random number generators of the wrapped envs.""" + return self.get_attr("np_random") + def reset( self, *, @@ -240,7 +250,9 @@ def reset_async( seed = [None for _ in range(self.num_envs)] elif isinstance(seed, int): seed = [seed + i for i in range(self.num_envs)] - assert len(seed) == self.num_envs + assert ( + len(seed) == self.num_envs + ), f"If seeds are passed as a list the length must match num_envs={self.num_envs} but got length={len(seed)}." if self._state != AsyncState.DEFAULT: raise AlreadyPendingCallError( @@ -472,7 +484,7 @@ def call_wait(self, timeout: int | float | None = None) -> tuple[Any, ...]: return results - def get_attr(self, name: str): + def get_attr(self, name: str) -> tuple[Any, ...]: """Get a property from each parallel environment. Args: diff --git a/gymnasium/vector/sync_vector_env.py b/gymnasium/vector/sync_vector_env.py index a9f234f2e..fd3e5586a 100644 --- a/gymnasium/vector/sync_vector_env.py +++ b/gymnasium/vector/sync_vector_env.py @@ -100,6 +100,16 @@ def __init__( self._autoreset_envs = np.zeros((self.num_envs,), dtype=np.bool_) + @property + def np_random_seed(self) -> tuple[int, ...]: + """Returns the seeds of the wrapped envs.""" + return self.get_attr("np_random_seed") + + @property + def np_random(self) -> tuple[np.random.Generator, ...]: + """Returns the numpy random number generators of the wrapped envs.""" + return self.get_attr("np_random") + def reset( self, *, @@ -122,7 +132,9 @@ def reset( seed = [None for _ in range(self.num_envs)] elif isinstance(seed, int): seed = [seed + i for i in range(self.num_envs)] - assert len(seed) == self.num_envs + assert ( + len(seed) == self.num_envs + ), f"If seeds are passed as a list the length must match num_envs={self.num_envs} but got length={len(seed)}." self._terminations = np.zeros((self.num_envs,), dtype=np.bool_) self._truncations = np.zeros((self.num_envs,), dtype=np.bool_) @@ -211,7 +223,7 @@ def call(self, name: str, *args: Any, **kwargs: Any) -> tuple[Any, ...]: return tuple(results) - def get_attr(self, name: str) -> Any: + def get_attr(self, name: str) -> tuple[Any, ...]: """Get a property from each parallel environment. Args: diff --git a/gymnasium/vector/utils/shared_memory.py b/gymnasium/vector/utils/shared_memory.py index 6f4d9c8a1..c159f7093 100644 --- a/gymnasium/vector/utils/shared_memory.py +++ b/gymnasium/vector/utils/shared_memory.py @@ -148,20 +148,25 @@ def _read_base_from_shared_memory( @read_from_shared_memory.register(Tuple) def _read_tuple_from_shared_memory(space: Tuple, shared_memory, n: int = 1): - return tuple( + subspace_samples = tuple( read_from_shared_memory(subspace, memory, n=n) for (memory, subspace) in zip(shared_memory, space.spaces) ) + return tuple(zip(*subspace_samples)) @read_from_shared_memory.register(Dict) def _read_dict_from_shared_memory(space: Dict, shared_memory, n: int = 1): - return OrderedDict( + subspace_samples = OrderedDict( [ (key, read_from_shared_memory(subspace, shared_memory[key], n=n)) for (key, subspace) in space.spaces.items() ] ) + return tuple( + OrderedDict({key: subspace_samples[key][i] for key in space.keys()}) + for i in range(n) + ) @read_from_shared_memory.register(Text) diff --git a/gymnasium/vector/vector_env.py b/gymnasium/vector/vector_env.py index 31cc4fcad..7127dd9aa 100644 --- a/gymnasium/vector/vector_env.py +++ b/gymnasium/vector/vector_env.py @@ -104,17 +104,18 @@ class VectorEnv(Generic[ObsType, ActType, ArrayType]): num_envs: int _np_random: np.random.Generator | None = None + _np_random_seed: int | None = None def reset( self, *, - seed: int | list[int] | None = None, + seed: int | None = None, options: dict[str, Any] | None = None, ) -> tuple[ObsType, dict[str, Any]]: # type: ignore """Reset all parallel environments and return a batch of initial observations and info. Args: - seed: The environment reset seeds + seed: The environment reset seed options: If to return the options Returns: @@ -133,7 +134,7 @@ def reset( {} """ if seed is not None: - self._np_random, seed = seeding.np_random(seed) + self._np_random, self._np_random_seed = seeding.np_random(seed) def step( self, actions: ActType @@ -210,6 +211,20 @@ def close_extras(self, **kwargs: Any): """Clean up the extra resources e.g. beyond what's in this base class.""" pass + @property + def np_random_seed(self) -> int | None: + """Returns the environment's internal :attr:`_np_random_seed` that if not set will first initialise with a random int as seed. + + If :attr:`np_random_seed` was set directly instead of through :meth:`reset` or :meth:`set_np_random_through_seed`, + the seed will take the value -1. + + Returns: + int: the seed of the current `np_random` or -1, if the seed of the rng is unknown + """ + if self._np_random_seed is None: + self._np_random, self._np_random_seed = seeding.np_random() + return self._np_random_seed + @property def np_random(self) -> np.random.Generator: """Returns the environment's internal :attr:`_np_random` that if not set will initialise with a random seed. @@ -218,12 +233,13 @@ def np_random(self) -> np.random.Generator: Instances of `np.random.Generator` """ if self._np_random is None: - self._np_random, seed = seeding.np_random() + self._np_random, self._np_random_seed = seeding.np_random() return self._np_random @np_random.setter def np_random(self, value: np.random.Generator): self._np_random = value + self._np_random_seed = -1 @property def unwrapped(self): @@ -430,6 +446,19 @@ def render_mode(self) -> tuple[RenderFrame, ...] | None: """Returns the `render_mode` from the base environment.""" return self.env.render_mode + @property + def np_random(self) -> np.random.Generator: + """Returns the environment's internal :attr:`_np_random` that if not set will initialise with a random seed. + + Returns: + Instances of `np.random.Generator` + """ + return self.env.np_random + + @np_random.setter + def np_random(self, value: np.random.Generator): + self.env.np_random = value + class VectorObservationWrapper(VectorWrapper): """Wraps the vectorized environment to allow a modular transformation of the observation. diff --git a/gymnasium/wrappers/__init__.py b/gymnasium/wrappers/__init__.py index 2709d8be3..09453ee95 100644 --- a/gymnasium/wrappers/__init__.py +++ b/gymnasium/wrappers/__init__.py @@ -73,11 +73,11 @@ TransformAction, ) from gymnasium.wrappers.transform_observation import ( + AddRenderObservation, DtypeObservation, FilterObservation, FlattenObservation, GrayscaleObservation, - RenderObservation, RescaleObservation, ReshapeObservation, ResizeObservation, @@ -99,7 +99,7 @@ "TransformObservation", "MaxAndSkipObservation", "NormalizeObservation", - "RenderObservation", + "AddRenderObservation", "ResizeObservation", "ReshapeObservation", "RescaleObservation", @@ -142,7 +142,7 @@ _renamed_wrapper = { "AutoResetWrapper": "Autoreset", "FrameStack": "FrameStackObservation", - "PixelObservationWrapper": "RenderObservation", + "PixelObservationWrapper": "AddRenderObservation", "VectorListInfo": "vector.DictInfoToList", } diff --git a/gymnasium/wrappers/stateful_observation.py b/gymnasium/wrappers/stateful_observation.py index a22fc3320..9414ae17f 100644 --- a/gymnasium/wrappers/stateful_observation.py +++ b/gymnasium/wrappers/stateful_observation.py @@ -346,7 +346,8 @@ class FrameStackObservation( Change logs: * v0.15.0 - Initially add as ``FrameStack`` with support for lz4 * v1.0.0 - Rename to ``FrameStackObservation`` and remove lz4 and ``LazyFrame`` support - along with adding the ``padding_type`` parameter + along with adding the ``padding_type`` parameter + """ def __init__( diff --git a/gymnasium/wrappers/transform_observation.py b/gymnasium/wrappers/transform_observation.py index 9d8c3a51d..ec1f50256 100644 --- a/gymnasium/wrappers/transform_observation.py +++ b/gymnasium/wrappers/transform_observation.py @@ -31,7 +31,7 @@ "ReshapeObservation", "RescaleObservation", "DtypeObservation", - "RenderObservation", + "AddRenderObservation", ] @@ -607,7 +607,7 @@ def __init__(self, env: gym.Env[ObsType, ActType], dtype: Any): ) -class RenderObservation( +class AddRenderObservation( TransformObservation[WrapperObsType, ActType, ObsType], gym.utils.RecordConstructorArgs, ): @@ -620,7 +620,7 @@ class RenderObservation( Example - Replace the observation with the rendered image: >>> env = gym.make("CartPole-v1", render_mode="rgb_array") - >>> env = RenderObservation(env, render_only=True) + >>> env = AddRenderObservation(env, render_only=True) >>> env.observation_space Box(0, 255, (400, 600, 3), uint8) >>> obs, _ = env.reset(seed=123) @@ -634,7 +634,7 @@ class RenderObservation( Example - Add the rendered image to the original observation as a dictionary item: >>> env = gym.make("CartPole-v1", render_mode="rgb_array") - >>> env = RenderObservation(env, render_only=False) + >>> env = AddRenderObservation(env, render_only=False) >>> env.observation_space Dict('pixels': Box(0, 255, (400, 600, 3), uint8), 'state': Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)) >>> obs, info = env.reset(seed=123) @@ -651,7 +651,7 @@ class RenderObservation( Change logs: * v0.15.0 - Initially added as ``PixelObservationWrapper`` - * v1.0.0 - Renamed to ``RenderObservation`` + * v1.0.0 - Renamed to ``AddRenderObservation`` """ def __init__( @@ -661,7 +661,7 @@ def __init__( render_key: str = "pixels", obs_key: str = "state", ): - """Constructor of the pixel observation wrapper. + """Constructor of the add render observation wrapper. Args: env: The environment to wrap. diff --git a/gymnasium/wrappers/vector/vectorize_reward.py b/gymnasium/wrappers/vector/vectorize_reward.py index b535d175a..8ec9173dc 100644 --- a/gymnasium/wrappers/vector/vectorize_reward.py +++ b/gymnasium/wrappers/vector/vectorize_reward.py @@ -14,18 +14,7 @@ class TransformReward(VectorRewardWrapper): """A reward wrapper that allows a custom function to modify the step reward. - Example: - Without reward transformation: - >>> import gymnasium as gym - >>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3) - >>> _ = envs.action_space.seed(123) - >>> obs, info = envs.reset(seed=123) - >>> obs, rew, term, trunc, info = envs.step(envs.action_space.sample()) - >>> envs.close() - >>> rew - array([-0.01330088, -0.07963027, -0.03127944]) - - With reward transformation: + Example with reward transformation: >>> import gymnasium as gym >>> from gymnasium.spaces import Box >>> def scale_and_shift(rew): @@ -62,18 +51,7 @@ def rewards(self, reward: ArrayType) -> ArrayType: class VectorizeTransformReward(VectorRewardWrapper): """Vectorizes a single-agent transform reward wrapper for vector environments. - Example: - Without reward transformation: - >>> import gymnasium as gym - >>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3) - >>> _ = envs.action_space.seed(123) - >>> obs, info = envs.reset(seed=123) - >>> obs, rew, term, trunc, info = envs.step(envs.action_space.sample()) - >>> envs.close() - >>> rew - array([-0.01330088, -0.07963027, -0.03127944]) - - Adding a transform that applies a ReLU to the reward: + An example such that applies a ReLU to the reward: >>> import gymnasium as gym >>> from gymnasium.wrappers import TransformReward >>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3) @@ -113,21 +91,7 @@ def rewards(self, reward: ArrayType) -> ArrayType: class ClipReward(VectorizeTransformReward): """A wrapper that clips the rewards for an environment between an upper and lower bound. - Example: - Without clipping rewards: - >>> import numpy as np - >>> import gymnasium as gym - >>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3) - >>> _ = envs.action_space.seed(123) - >>> obs, info = envs.reset(seed=123) - >>> for _ in range(10): - ... obs, rew, term, trunc, info = envs.step(0.5 * np.ones((3, 1))) - ... - >>> envs.close() - >>> rew - array([-0.025, -0.025, -0.025]) - - With clipped rewards: + Example with clipped rewards: >>> import numpy as np >>> import gymnasium as gym >>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3) diff --git a/tests/envs/mujoco/test_mujoco_v5.py b/tests/envs/mujoco/test_mujoco_v5.py index 20ea9c028..44ba9a6b2 100644 --- a/tests/envs/mujoco/test_mujoco_v5.py +++ b/tests/envs/mujoco/test_mujoco_v5.py @@ -7,6 +7,8 @@ import gymnasium as gym from gymnasium.envs.mujoco.mujoco_env import BaseMujocoEnv, MujocoEnv +from gymnasium.envs.mujoco.mujoco_py_env import BaseMujocoPyEnv +from gymnasium.envs.mujoco.utils import check_mujoco_reset_state from gymnasium.error import Error from gymnasium.utils.env_checker import check_env from gymnasium.utils.env_match import check_environments_match @@ -88,7 +90,7 @@ def test_verify_info_y_position(env_id: str): def test_verify_info_x_velocity(env_name: str, version: str): """Asserts that the environment `info['x_velocity']` is properly assigned.""" env = gym.make(f"{env_name}-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) env.reset() old_x = env.data.qpos[0] @@ -105,7 +107,7 @@ def test_verify_info_x_velocity(env_name: str, version: str): def test_verify_info_y_velocity(env_id: str): """Asserts that the environment `info['y_velocity']` is properly assigned.""" env = gym.make(env_id).unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) env.reset() old_y = env.data.qpos[1] @@ -121,7 +123,7 @@ def test_verify_info_y_velocity(env_id: str): def test_verify_info_xy_velocity_xpos(env_id: str): """Asserts that the environment `info['x/y_velocity']` is properly assigned, for the ant environment which uses kinmatics for the velocity.""" env = gym.make(env_id).unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) env.reset() old_xy = env.get_body_com("torso")[:2].copy() @@ -144,7 +146,7 @@ def mass_center(model, data): return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy() env = gym.make(env_id).unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) env.reset() old_xy = mass_center(env.model, env.data) @@ -176,7 +178,7 @@ def mass_center(model, data): def test_verify_reward_survive(env_name: str, version: str): """Assert that `reward_survive` is 0 on `terminal` states and not 0 on non-`terminal` states.""" env = gym.make(f"{env_name}-{version}", reset_noise_scale=0).unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) env.reset(seed=0) env.action_space.seed(1) @@ -357,7 +359,7 @@ def test_ant_com(version: str): """Verify the kinmatic behaviour of the ant.""" # `env` contains `data : MjData` and `model : MjModel` env = gym.make(f"Ant-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) env.reset() # randomly initlizies the `data.qpos` and `data.qvel`, calls mujoco.mj_forward(env.model, env.data) x_position_before = env.data.qpos[0] @@ -378,7 +380,7 @@ def test_ant_com(version: str): def test_set_state(version: str): """Simple Test to verify that `mujocoEnv.set_state()` works correctly.""" env = gym.make(f"Hopper-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) env.reset() new_qpos = np.array( [0.00136962, 1.24769787, -0.00459026, -0.00483472, 0.0031327, 0.00412756] @@ -401,7 +403,7 @@ def test_set_state(version: str): def test_distance_from_origin_info(env_id: str): """Verify that `info"distance_from_origin"` is correct.""" env = gym.make(env_id).unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) env.reset() _, _, _, _, info = env.step(env.action_space.sample()) @@ -483,7 +485,7 @@ def test_reset_info(env_name: str, version: str): def test_inverted_double_pendulum_max_height(version: str): """Verify the max height of Inverted Double Pendulum.""" env = gym.make(f"InvertedDoublePendulum-{version}", reset_noise_scale=0).unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) env.reset() y = env.data.site_xpos[0][2] @@ -494,7 +496,7 @@ def test_inverted_double_pendulum_max_height(version: str): def test_inverted_double_pendulum_max_height_old(version: str): """Verify the max height of Inverted Double Pendulum (v4 does not have `reset_noise_scale` argument).""" env = gym.make(f"InvertedDoublePendulum-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) env.set_state(env.init_qpos, env.init_qvel) y = env.data.site_xpos[0][2] @@ -506,7 +508,7 @@ def test_inverted_double_pendulum_max_height_old(version: str): def test_model_object_count(version: str): """Verify that all the objects of the model are loaded, mostly useful for using non-mujoco simulator.""" env = gym.make(f"Ant-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 15 assert env.model.nv == 14 assert env.model.nu == 8 @@ -517,7 +519,7 @@ def test_model_object_count(version: str): assert env.model.ntendon == 0 env = gym.make(f"HalfCheetah-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 9 assert env.model.nv == 9 assert env.model.nu == 6 @@ -528,7 +530,7 @@ def test_model_object_count(version: str): assert env.model.ntendon == 0 env = gym.make(f"Hopper-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 6 assert env.model.nv == 6 assert env.model.nu == 3 @@ -539,7 +541,7 @@ def test_model_object_count(version: str): assert env.model.ntendon == 0 env = gym.make(f"Humanoid-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 24 assert env.model.nv == 23 assert env.model.nu == 17 @@ -550,7 +552,7 @@ def test_model_object_count(version: str): assert env.model.ntendon == 2 env = gym.make(f"HumanoidStandup-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 24 assert env.model.nv == 23 assert env.model.nu == 17 @@ -561,7 +563,7 @@ def test_model_object_count(version: str): assert env.model.ntendon == 2 env = gym.make(f"InvertedDoublePendulum-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 3 assert env.model.nv == 3 assert env.model.nu == 1 @@ -572,7 +574,7 @@ def test_model_object_count(version: str): assert env.model.ntendon == 0 env = gym.make(f"InvertedPendulum-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 2 assert env.model.nv == 2 assert env.model.nu == 1 @@ -583,40 +585,43 @@ def test_model_object_count(version: str): assert env.model.ntendon == 0 env = gym.make(f"Pusher-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 11 assert env.model.nv == 11 assert env.model.nu == 7 assert env.model.nbody == 13 - assert env.model.nbvh == 18 + if mujoco.__version__ >= "3.1.2": + assert env.model.nbvh == 8 assert env.model.njnt == 11 assert env.model.ngeom == 21 assert env.model.ntendon == 0 env = gym.make(f"Reacher-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 4 assert env.model.nv == 4 assert env.model.nu == 2 assert env.model.nbody == 5 - assert env.model.nbvh == 5 + if mujoco.__version__ >= "3.1.2": + assert env.model.nbvh == 3 assert env.model.njnt == 4 assert env.model.ngeom == 10 assert env.model.ntendon == 0 env = gym.make(f"Swimmer-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 5 assert env.model.nv == 5 assert env.model.nu == 2 assert env.model.nbody == 4 - assert env.model.nbvh == 4 + if mujoco.__version__ >= "3.1.2": + assert env.model.nbvh == 0 assert env.model.njnt == 5 assert env.model.ngeom == 4 assert env.model.ntendon == 0 env = gym.make(f"Walker2d-{version}").unwrapped - assert isinstance(env, BaseMujocoEnv) + assert isinstance(env, (BaseMujocoEnv, BaseMujocoPyEnv)) assert env.model.nq == 9 assert env.model.nv == 9 assert env.model.nu == 6 @@ -695,3 +700,11 @@ def test_reset_noise_scale(env_id): assert np.all(env.data.qpos == env.init_qpos) assert np.all(env.data.qvel == env.init_qvel) + + +@pytest.mark.parametrize("env_name", ALL_MUJOCO_ENVS) +@pytest.mark.parametrize("version", ["v5", "v4"]) +def test_reset_state(env_name: str, version: str): + """Asserts that `reset()` properly resets the internal state.""" + env = gym.make(f"{env_name}-{version}") + check_mujoco_reset_state(env) diff --git a/tests/envs/registration/test_make_vec.py b/tests/envs/registration/test_make_vec.py index b4aeba60a..5d963384f 100644 --- a/tests/envs/registration/test_make_vec.py +++ b/tests/envs/registration/test_make_vec.py @@ -114,6 +114,22 @@ def test_make_vec_vectorization_mode(): gym.make_vec("CartPole-v1", vectorization_mode=123) +def test_make_vec_render_mode(): + envs = gym.make_vec( + "CartPole-v1", vectorization_mode=VectorizeMode.VECTOR_ENTRY_POINT + ) + assert envs.render_mode is None + envs.close() + + envs = gym.make_vec( + "CartPole-v1", + render_mode="rgb_array", + vectorization_mode=VectorizeMode.VECTOR_ENTRY_POINT, + ) + assert envs.render_mode == "rgb_array" + envs.close() + + def test_make_vec_wrappers(): """Tests that the `gym.make_vec` wrappers parameter works.""" env = gym.make_vec("CartPole-v1", num_envs=2, vectorization_mode="sync") diff --git a/tests/envs/test_env_implementation.py b/tests/envs/test_env_implementation.py index 312b4e78e..22677b328 100644 --- a/tests/envs/test_env_implementation.py +++ b/tests/envs/test_env_implementation.py @@ -223,3 +223,65 @@ def test_invalid_customizable_resets(env_name: str, low_high: list): # match=re.escape(f"Lower bound ({low}) must be lower than higher bound ({high}).") # match=f"An option ({x}) could not be converted to a float." env.reset(options={"low": low, "high": high}) + + +def test_cartpole_vector_equiv(): + env = gym.make("CartPole-v1") + envs = gym.make_vec("CartPole-v1", num_envs=1) + + assert env.action_space == envs.single_action_space + assert env.observation_space == envs.single_observation_space + + # reset + seed = np.random.randint(0, 1000) + obs, info = env.reset(seed=seed) + vec_obs, vec_info = envs.reset(seed=seed) + + assert obs in env.observation_space + assert vec_obs in envs.observation_space + assert np.all(obs == vec_obs[0]) + assert info == vec_info + + assert np.all(env.unwrapped.state == envs.unwrapped.state[:, 0]) + + # step + for i in range(100): + action = env.action_space.sample() + assert np.array([action]) in envs.action_space + + obs, reward, term, trunc, info = env.step(action) + vec_obs, vec_reward, vec_term, vec_trunc, vec_info = envs.step( + np.array([action]) + ) + + assert obs in env.observation_space + assert vec_obs in envs.observation_space + assert np.all(obs == vec_obs[0]) + assert reward == vec_reward + assert term == vec_term + assert trunc == vec_trunc + assert info == vec_info + + assert np.all(env.unwrapped.state == envs.unwrapped.state[:, 0]) + + if term: + break + + obs, info = env.reset() + # the vector action shouldn't matter as autoreset + vec_obs, vec_reward, vec_term, vec_trunc, vec_info = envs.step( + envs.action_space.sample() + ) + + assert obs in env.observation_space + assert vec_obs in envs.observation_space + assert np.all(obs == vec_obs[0]) + assert vec_reward == np.array([0]) + assert vec_term == np.array([False]) + assert vec_trunc == np.array([False]) + assert info == vec_info + + assert np.all(env.unwrapped.state == envs.unwrapped.state[:, 0]) + + env.close() + envs.close() diff --git a/tests/test_core.py b/tests/test_core.py index 9916620c1..a91d3d6a7 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -12,6 +12,7 @@ from gymnasium.core import ActType, ObsType, WrapperActType, WrapperObsType from gymnasium.spaces import Box from gymnasium.utils import seeding +from gymnasium.utils.seeding import np_random from gymnasium.wrappers import OrderEnforcing from tests.testing_env import GenericTestEnv @@ -37,17 +38,22 @@ def reset( options: dict | None = None, ) -> tuple[ObsType, dict]: """Resets the environment.""" + super().reset(seed=seed, options=options) return 0, {} -def test_example_env(): +@pytest.fixture +def example_env(): + return ExampleEnv() + + +def test_example_env(example_env): """Tests a gymnasium environment.""" - env = ExampleEnv() - assert env.metadata == {"render_modes": []} - assert env.render_mode is None - assert env.spec is None - assert env._np_random is None # pyright: ignore [reportPrivateUsage] + assert example_env.metadata == {"render_modes": []} + assert example_env.render_mode is None + assert example_env.spec is None + assert example_env._np_random is None # pyright: ignore [reportPrivateUsage] class ExampleWrapper(Wrapper): @@ -77,9 +83,9 @@ def access_hidden_np_random(self): return self._np_random -def test_example_wrapper(): +def test_example_wrapper(example_env): """Tests the gymnasium wrapper works as expected.""" - env = ExampleEnv() + env = example_env wrapper_env = ExampleWrapper(env) assert env.metadata == wrapper_env.metadata @@ -202,3 +208,45 @@ def test_get_set_wrapper_attr(): with pytest.raises(AttributeError): env.unwrapped._disable_render_order_enforcing assert env.get_wrapper_attr("_disable_render_order_enforcing") is True + + +class TestRandomSeeding: + @staticmethod + def test_nonempty_seed_retrieved_when_not_set(example_env): + assert example_env.np_random_seed is not None + assert isinstance(example_env.np_random_seed, int) + + @staticmethod + def test_seed_set_at_reset_and_retrieved(example_env): + seed = 42 + example_env.reset(seed=seed) + assert example_env.np_random_seed == seed + # resetting with seed=None means seed remains the same + example_env.reset(seed=None) + assert example_env.np_random_seed == seed + + @staticmethod + def test_seed_cannot_be_set_directly(example_env): + with pytest.raises(AttributeError): + example_env.np_random_seed = 42 + + @staticmethod + def test_negative_seed_retrieved_when_seed_unknown(example_env): + rng, _ = np_random() + example_env.np_random = rng + # seed is unknown + assert example_env.np_random_seed == -1 + + @staticmethod + def test_seeding_works_in_wrapped_envs(example_env): + seed = 42 + wrapper_env = ExampleWrapper(example_env) + wrapper_env.reset(seed=seed) + assert wrapper_env.np_random_seed == seed + # resetting with seed=None means seed remains the same + wrapper_env.reset(seed=None) + assert wrapper_env.np_random_seed == seed + # setting np_random directly makes seed unknown + rng, _ = np_random() + wrapper_env.np_random = rng + assert wrapper_env.np_random_seed == -1 diff --git a/tests/utils/test_env_checker.py b/tests/utils/test_env_checker.py index d348dc0c8..f04f02a79 100644 --- a/tests/utils/test_env_checker.py +++ b/tests/utils/test_env_checker.py @@ -14,8 +14,9 @@ check_reset_options, check_reset_return_info_deprecation, check_reset_return_type, - check_reset_seed, + check_reset_seed_determinism, check_seed_deprecation, + check_step_determinism, ) from tests.testing_env import GenericTestEnv @@ -106,16 +107,16 @@ def _reset_default_seed(self: GenericTestEnv, seed="Error", options=None): ], ], ) -def test_check_reset_seed(test, func: Callable, message: str): +def test_check_reset_seed_determinism(test, func: Callable, message: str): """Tests the check reset seed function works as expected.""" if test is UserWarning: with pytest.warns( UserWarning, match=f"^\\x1b\\[33mWARN: {re.escape(message)}\\x1b\\[0m$" ): - check_reset_seed(GenericTestEnv(reset_func=func)) + check_reset_seed_determinism(GenericTestEnv(reset_func=func)) else: with pytest.raises(test, match=f"^{re.escape(message)}$"): - check_reset_seed(GenericTestEnv(reset_func=func)) + check_reset_seed_determinism(GenericTestEnv(reset_func=func)) def _deprecated_return_info( @@ -239,6 +240,32 @@ def test_check_reset_options(): check_reset_options(GenericTestEnv(reset_func=lambda self: (0, {}))) +@pytest.mark.parametrize( + "test,step_func,message", + [ + [ + AssertionError, + lambda self, action: (np.random.normal(), 0, False, False, {}), + "step observation is not deterministic.", + ], + [ + AssertionError, + lambda self, action: (0, np.random.normal(), False, False, {}), + "step reward is not deterministic.", + ], + [ + AssertionError, + lambda self, action: (0, 0, False, False, {"value": np.random.normal()}), + "step info is not deterministic.", + ], + ], +) +def test_check_step_determinism(test, step_func, message: str): + """Tests the check_step_determinism function.""" + with pytest.raises(test, match=f"^{re.escape(message)}$"): + check_step_determinism(GenericTestEnv(step_func=step_func)) + + @pytest.mark.parametrize( "env,message", [ diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py index dba5cdd51..0593b4101 100644 --- a/tests/vector/test_vector_env.py +++ b/tests/vector/test_vector_env.py @@ -122,3 +122,46 @@ def thunk(): ) env.close() + + +@pytest.fixture +def example_env_list(): + """Example vector environment.""" + return [make_env("CartPole-v1", i) for i in range(4)] + + +@pytest.mark.parametrize( + "venv_constructor", + [ + SyncVectorEnv, + partial(AsyncVectorEnv, shared_memory=True), + partial(AsyncVectorEnv, shared_memory=False), + ], +) +def test_random_seeding_basics(venv_constructor, example_env_list): + seed = 42 + vector_env = venv_constructor(example_env_list) + vector_env.reset(seed=seed) + assert vector_env.np_random_seed == tuple( + seed + i for i in range(vector_env.num_envs) + ) + # resetting with seed=None means seed remains the same + vector_env.reset(seed=None) + assert vector_env.np_random_seed == tuple( + seed + i for i in range(vector_env.num_envs) + ) + + +@pytest.mark.parametrize( + "venv_constructor", + [ + SyncVectorEnv, + partial(AsyncVectorEnv, shared_memory=True), + partial(AsyncVectorEnv, shared_memory=False), + ], +) +def test_random_seeds_set_at_retrieval(venv_constructor, example_env_list): + vector_env = venv_constructor(example_env_list) + assert len(set(vector_env.np_random_seed)) == vector_env.num_envs + # default seed starts at zero. Adjust or remove this test if the default seed changes + assert vector_env.np_random_seed == tuple(range(vector_env.num_envs)) diff --git a/tests/vector/utils/test_shared_memory.py b/tests/vector/utils/test_shared_memory.py index c4b732c81..9b3118c64 100644 --- a/tests/vector/utils/test_shared_memory.py +++ b/tests/vector/utils/test_shared_memory.py @@ -22,7 +22,7 @@ "ctx", [None, "fork", "spawn"], ids=["default", "fork", "spawn"] ) def test_shared_memory_create_read_write(space, num, ctx): - """Test the shared memory functions, create, read and write for all of the testing spaces.""" + """Test the shared memory functions, create, read and write for all testing spaces.""" if ctx not in mp.get_all_start_methods(): pytest.skip( f"Multiprocessing start method {ctx} not available on this platform." @@ -41,7 +41,7 @@ def test_shared_memory_create_read_write(space, num, ctx): read_samples = read_from_shared_memory(space, shared_memory, n=num) for read_sample, sample in zip(read_samples, samples): - data_equivalence(read_sample, sample) + assert data_equivalence(read_sample, sample) def test_custom_space(): diff --git a/tests/wrappers/test_render_observation.py b/tests/wrappers/test_add_render_observation.py similarity index 95% rename from tests/wrappers/test_render_observation.py rename to tests/wrappers/test_add_render_observation.py index 629c1dfbe..7ea9b66cf 100644 --- a/tests/wrappers/test_render_observation.py +++ b/tests/wrappers/test_add_render_observation.py @@ -3,7 +3,7 @@ import pytest from gymnasium import spaces -from gymnasium.wrappers import RenderObservation +from gymnasium.wrappers import AddRenderObservation from tests.testing_env import GenericTestEnv @@ -30,7 +30,7 @@ def test_dict_observation(pixels_only, pixel_key="rgb"): # width, height = (320, 240) # The wrapper should only add one observation. - wrapped_env = RenderObservation( + wrapped_env = AddRenderObservation( env, render_key=pixel_key, render_only=pixels_only, @@ -69,7 +69,7 @@ def test_single_array_observation(pixels_only): assert isinstance(env.observation_space, spaces.Box) # The wrapper should only add one observation. - wrapped_env = RenderObservation( + wrapped_env = AddRenderObservation( env, render_key=pixel_key, render_only=pixels_only,