diff --git a/docs/.gitignore b/docs/.gitignore index 35290aec8..5b8364a2d 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -4,9 +4,12 @@ __pycache__ build/ _build/ -tutorials/* -!tutorials/*.md -!tutorials/*.py +tutorials/**/*.pickle +tutorials/**/images/ +tutorials/**/*.rst +tutorials/**/*.ipynb +tutorials/**/*.zip +!tutorials/**/README.rst environments/**/list.html environments/**/complete_list.html diff --git a/docs/conf.py b/docs/conf.py index 89a33be43..071d29f7a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,9 +16,10 @@ # -- Project information ----------------------------------------------------- import os +import re from typing import Any, Dict -from furo import gen_tutorials +import sphinx_gallery.gen_rst import gymnasium @@ -43,6 +44,7 @@ "sphinx.ext.githubpages", "myst_parser", "furo.gen_tutorials", + "sphinx_gallery.gen_gallery", "sphinx_github_changelog", ] @@ -52,7 +54,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["tutorials/demo.rst"] +exclude_patterns = ["tutorials/README.rst"] # Napoleon settings napoleon_use_ivar = True @@ -95,10 +97,29 @@ # -- Generate Tutorials ------------------------------------------------- -gen_tutorials.generate( - os.path.dirname(__file__), - os.path.join(os.path.dirname(__file__), "tutorials"), -) +sphinx_gallery.gen_rst.EXAMPLE_HEADER = """ +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "{0}" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_{1}: + +""" + +sphinx_gallery_conf = { + "ignore_pattern": r"__init__\.py", + "examples_dirs": "./tutorials", + "gallery_dirs": "./tutorials", + "show_signature": False, + "show_memory": False, + "min_reported_time": float("inf"), + "filename_pattern": f"{re.escape(os.sep)}run_", + "default_thumb_file": "_static/img/gymnasium-github.png", +} # -- Generate Changelog ------------------------------------------------- diff --git a/docs/index.md b/docs/index.md index db4cf205a..1e0bad052 100644 --- a/docs/index.md +++ b/docs/index.md @@ -67,7 +67,7 @@ environments/third_party_environments :glob: :caption: Tutorials -tutorials/* +tutorials/**/index ``` ```{toctree} diff --git a/docs/requirements.txt b/docs/requirements.txt index 631b565fb..f9c624c7b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,7 @@ sphinx sphinx-autobuild myst-parser -sphinx_gallery +git+https://github.com/sphinx-gallery/sphinx-gallery.git@4006662c8c1984453a247dc6d3df6260e5b00f4b#egg=sphinx_gallery git+https://github.com/Farama-Foundation/Celshast#egg=furo moviepy pygame diff --git a/docs/tutorials/README.rst b/docs/tutorials/README.rst new file mode 100644 index 000000000..0c7e28c3b --- /dev/null +++ b/docs/tutorials/README.rst @@ -0,0 +1,2 @@ +Tutorials +========= diff --git a/docs/tutorials/demo.py b/docs/tutorials/demo.py deleted file mode 100644 index 2547025d2..000000000 --- a/docs/tutorials/demo.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Demo tutorial script -========================= - -This file is not listed in the website and serves only to give an example of a tutorial file. And is mostly a copy-paste from sphinx-gallery. -""" - -# %% -# This is a section header -# ------------------------ -# This is the first section! -# The `#%%` signifies to Sphinx-Gallery that this text should be rendered as -# rST and if using one of the above IDE/plugin's, also signifies the start of a -# 'code block'. - -# This line won't be rendered as rST because there's a space after the last block. -myvariable = 2 -print(f"my variable is {myvariable}") -# This is the end of the 'code block' (if using an above IDE). All code within -# this block can be easily executed all at once. - -# %% -# This is another section header -# ------------------------------ -# -# In the built documentation, it will be rendered as rST after the code above! -# This is also another code block. - -print(f"my variable plus 2 is {myvariable + 2}") diff --git a/docs/tutorials/gymnasium_basics/README.rst b/docs/tutorials/gymnasium_basics/README.rst new file mode 100644 index 000000000..29c32e7bb --- /dev/null +++ b/docs/tutorials/gymnasium_basics/README.rst @@ -0,0 +1,2 @@ +Gymnasium Basics +---------------- diff --git a/docs/tutorials/environment_creation.py b/docs/tutorials/gymnasium_basics/environment_creation.py similarity index 100% rename from docs/tutorials/environment_creation.py rename to docs/tutorials/gymnasium_basics/environment_creation.py diff --git a/docs/tutorials/handling_time_limits.py b/docs/tutorials/gymnasium_basics/handling_time_limits.py similarity index 100% rename from docs/tutorials/handling_time_limits.py rename to docs/tutorials/gymnasium_basics/handling_time_limits.py diff --git a/docs/tutorials/implementing_custom_wrappers.py b/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py similarity index 100% rename from docs/tutorials/implementing_custom_wrappers.py rename to docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py diff --git a/docs/tutorials/vector_envs_tutorial.py b/docs/tutorials/gymnasium_basics/vector_envs_tutorial.py similarity index 98% rename from docs/tutorials/vector_envs_tutorial.py rename to docs/tutorials/gymnasium_basics/vector_envs_tutorial.py index 597969cdd..c62512d6b 100644 --- a/docs/tutorials/vector_envs_tutorial.py +++ b/docs/tutorials/gymnasium_basics/vector_envs_tutorial.py @@ -1,13 +1,13 @@ """ Training A2C with Vector Envs and Domain Randomization -================================= +====================================================== """ # %% # Introduction -# ------------------------------ +# ------------ # # In this tutorial, you'll learn how to use vectorized environments to train an Advantage Actor-Critic agent. # We are going to use A2C, which is the synchronous version of the A3C algorithm [1]. @@ -56,7 +56,7 @@ # %% # Advantage Actor-Critic (A2C) -# ------------------------------ +# ---------------------------- # # The Actor-Critic combines elements of value-based and policy-based methods. In A2C, the agent has two separate neural networks: # a critic network that estimates the state-value function, and an actor network that outputs logits for a categorical probability distribution over all actions. @@ -241,7 +241,7 @@ def update_parameters( # %% # Using Vectorized Environments -# ------------------------------ +# ----------------------------- # # When you calculate the losses for the two Neural Networks over only one epoch, it might have a high variance. With vectorized environments, # we can play with `n_envs` in parallel and thus get up to a linear speedup (meaning that in theory, we collect samples `n_envs` times quicker) @@ -259,7 +259,7 @@ def update_parameters( # %% # Domain Randomization -# ------------------------------ +# -------------------- # # If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment # and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them. @@ -337,7 +337,7 @@ def update_parameters( # %% # Setup -# ------------------------------ +# ----- # # environment hyperparams @@ -398,7 +398,7 @@ def update_parameters( # %% # Training the A2C Agent -# ------------------------------ +# ---------------------- # # For our training loop, we are using the `RecordEpisodeStatistics` wrapper to record the episode lengths and returns and we are also saving # the losses and entropies to plot them after the agent finished training. @@ -478,7 +478,7 @@ def update_parameters( # %% # Plotting -# ------------------------------ +# -------- # """ plot the results """ @@ -550,7 +550,7 @@ def update_parameters( # %% # Performance Analysis of Synchronous and Asynchronous Vectorized Environments -# ------------------------------ +# ---------------------------------------------------------------------------- # # %% @@ -608,7 +608,7 @@ def update_parameters( # %% # Saving/ Loading Weights -# ------------------------------ +# ----------------------- # save_weights = False @@ -638,7 +638,7 @@ def update_parameters( # %% # Showcase the Agent -# ------------------------------ +# ------------------ # """ play a couple of showcase episodes """ @@ -690,7 +690,7 @@ def update_parameters( # %% # Try playing the environment yourself -# ------------------------------ +# ------------------------------------ # # from gymnasium.utils.play import play @@ -701,7 +701,7 @@ def update_parameters( # %% # References -# ------------------------------ +# ---------- # # [1] V. Mnih, A. P. Badia, M. Mirza, A. Graves, T. P. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu. "Asynchronous Methods for Deep Reinforcement Learning" ICML (2016). # diff --git a/docs/tutorials/training_agents/README.rst b/docs/tutorials/training_agents/README.rst new file mode 100644 index 000000000..b203cce35 --- /dev/null +++ b/docs/tutorials/training_agents/README.rst @@ -0,0 +1,2 @@ +Training Agents +--------------- diff --git a/docs/tutorials/blackjack_tutorial.py b/docs/tutorials/training_agents/blackjack_tutorial.py similarity index 100% rename from docs/tutorials/blackjack_tutorial.py rename to docs/tutorials/training_agents/blackjack_tutorial.py diff --git a/docs/tutorials/reinforce_invpend_gym_v26.py b/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py similarity index 100% rename from docs/tutorials/reinforce_invpend_gym_v26.py rename to docs/tutorials/training_agents/reinforce_invpend_gym_v26.py