From a88e2115f6af9d1df3f3b73734ce584659357172 Mon Sep 17 00:00:00 2001 From: Fintan Horan Date: Thu, 30 Nov 2023 23:57:25 -0500 Subject: [PATCH 1/2] Update getting_started.md - The return values of `env.step()` have changed, including the deprecation of the `done` flag in favor of `terminated` and `truncated`. - Stable Baselines 3 is now the recommended RL library. --- docs/getting_started.md | 2 +- retro/examples/trivial_random_agent.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 83b33e210..be3f36f37 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -66,7 +66,7 @@ This algorithm works by building up a sequence of button presses that do well in Using ["Proximal Policy Optimization"](https://arxiv.org/abs/1707.06347) by Schulman et al., you can train an agent to play many of the games, though it takes awhile and is much faster with a GPU. -This example requires installing [OpenAI Baselines](https://github.com/openai/baselines). Once installed, you can run it: +This example requires installing [Stable Baselines](https://github.com/DLR-RM/stable-baselines3). Once installed, you can run it: ```shell python3 -m retro.examples.ppo --game Airstriker-Genesis diff --git a/retro/examples/trivial_random_agent.py b/retro/examples/trivial_random_agent.py index 48ee02d30..7cf78466e 100644 --- a/retro/examples/trivial_random_agent.py +++ b/retro/examples/trivial_random_agent.py @@ -5,9 +5,10 @@ def main(): env = retro.make(game="Airstriker-Genesis") env.reset() while True: - obs, rew, done, info = env.step(env.action_space.sample()) + action = env.action_space.sample() + observation, reward, terminated, truncated, info = env.step(action) env.render() - if done: + if terminated: env.reset() env.close() From d9ce0936670edf4fd06d62332e63bf8265909b21 Mon Sep 17 00:00:00 2001 From: Fintan Horan Date: Fri, 1 Dec 2023 12:55:28 -0500 Subject: [PATCH 2/2] Add truncated check to trivial_random_agent.py --- retro/examples/trivial_random_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retro/examples/trivial_random_agent.py b/retro/examples/trivial_random_agent.py index 7cf78466e..dc2f64e60 100644 --- a/retro/examples/trivial_random_agent.py +++ b/retro/examples/trivial_random_agent.py @@ -8,7 +8,7 @@ def main(): action = env.action_space.sample() observation, reward, terminated, truncated, info = env.step(action) env.render() - if terminated: + if terminated or truncated: env.reset() env.close()