diff --git a/docs/getting_started.md b/docs/getting_started.md index 83b33e210..be3f36f37 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -66,7 +66,7 @@ This algorithm works by building up a sequence of button presses that do well in Using ["Proximal Policy Optimization"](https://arxiv.org/abs/1707.06347) by Schulman et al., you can train an agent to play many of the games, though it takes awhile and is much faster with a GPU. -This example requires installing [OpenAI Baselines](https://github.com/openai/baselines). Once installed, you can run it: +This example requires installing [Stable Baselines](https://github.com/DLR-RM/stable-baselines3). Once installed, you can run it: ```shell python3 -m retro.examples.ppo --game Airstriker-Genesis diff --git a/retro/examples/trivial_random_agent.py b/retro/examples/trivial_random_agent.py index 48ee02d30..dc2f64e60 100644 --- a/retro/examples/trivial_random_agent.py +++ b/retro/examples/trivial_random_agent.py @@ -5,9 +5,10 @@ def main(): env = retro.make(game="Airstriker-Genesis") env.reset() while True: - obs, rew, done, info = env.step(env.action_space.sample()) + action = env.action_space.sample() + observation, reward, terminated, truncated, info = env.step(action) env.render() - if done: + if terminated or truncated: env.reset() env.close()