Tutorials galleries (openai#258)

RetroAI · Jan 11, 2023 · 4e6dc3e · 4e6dc3e
1 parent 35fe9b0
commit 4e6dc3e
Show file tree

Hide file tree

Showing 14 changed files with 54 additions and 53 deletions.
diff --git a/docs/.gitignore b/docs/.gitignore
@@ -4,9 +4,12 @@ __pycache__
 build/
 _build/
 
-tutorials/*
-!tutorials/*.md
-!tutorials/*.py
+tutorials/**/*.pickle
+tutorials/**/images/
+tutorials/**/*.rst
+tutorials/**/*.ipynb
+tutorials/**/*.zip
+!tutorials/**/README.rst
 
 environments/**/list.html
 environments/**/complete_list.html

diff --git a/docs/conf.py b/docs/conf.py
@@ -16,9 +16,10 @@
 
 # -- Project information -----------------------------------------------------
 import os
+import re
 from typing import Any, Dict
 
-from furo import gen_tutorials
+import sphinx_gallery.gen_rst
 
 import gymnasium
 
@@ -43,6 +44,7 @@
     "sphinx.ext.githubpages",
     "myst_parser",
     "furo.gen_tutorials",
+    "sphinx_gallery.gen_gallery",
     "sphinx_github_changelog",
 ]
 
@@ -52,7 +54,7 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["tutorials/demo.rst"]
+exclude_patterns = ["tutorials/README.rst"]
 
 # Napoleon settings
 napoleon_use_ivar = True
@@ -95,10 +97,29 @@
 
 # -- Generate Tutorials -------------------------------------------------
 
-gen_tutorials.generate(
-    os.path.dirname(__file__),
-    os.path.join(os.path.dirname(__file__), "tutorials"),
-)
+sphinx_gallery.gen_rst.EXAMPLE_HEADER = """
+.. DO NOT EDIT.
+.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
+.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
+.. "{0}"
+.. LINE NUMBERS ARE GIVEN BELOW.
+
+.. rst-class:: sphx-glr-example-title
+
+.. _sphx_glr_{1}:
+
+"""
+
+sphinx_gallery_conf = {
+    "ignore_pattern": r"__init__\.py",
+    "examples_dirs": "./tutorials",
+    "gallery_dirs": "./tutorials",
+    "show_signature": False,
+    "show_memory": False,
+    "min_reported_time": float("inf"),
+    "filename_pattern": f"{re.escape(os.sep)}run_",
+    "default_thumb_file": "_static/img/gymnasium-github.png",
+}
 
 # -- Generate Changelog -------------------------------------------------
 

diff --git a/docs/index.md b/docs/index.md
@@ -67,7 +67,7 @@ environments/third_party_environments
 :glob:
 :caption: Tutorials
 
-tutorials/*
+tutorials/**/index
 ```
 
 ```{toctree}

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,7 +1,7 @@
 sphinx
 sphinx-autobuild
 myst-parser
-sphinx_gallery
+git+https://github.com/sphinx-gallery/sphinx-gallery.git@4006662c8c1984453a247dc6d3df6260e5b00f4b#egg=sphinx_gallery
 git+https://github.com/Farama-Foundation/Celshast#egg=furo
 moviepy
 pygame

diff --git a/docs/tutorials/README.rst b/docs/tutorials/README.rst
@@ -0,0 +1,2 @@
+Tutorials
+=========
diff --git a/docs/tutorials/demo.py b/docs/tutorials/demo.py
diff --git a/docs/tutorials/gymnasium_basics/README.rst b/docs/tutorials/gymnasium_basics/README.rst
@@ -0,0 +1,2 @@
+Gymnasium Basics
+----------------
diff --git a/docs/tutorials/environment_creation.py → .../gymnasium_basics/environment_creation.py b/docs/tutorials/environment_creation.py → .../gymnasium_basics/environment_creation.py
diff --git a/docs/tutorials/handling_time_limits.py → .../gymnasium_basics/handling_time_limits.py b/docs/tutorials/handling_time_limits.py → .../gymnasium_basics/handling_time_limits.py
diff --git a/...tutorials/implementing_custom_wrappers.py → ...um_basics/implementing_custom_wrappers.py b/...tutorials/implementing_custom_wrappers.py → ...um_basics/implementing_custom_wrappers.py
diff --git a/docs/tutorials/vector_envs_tutorial.py → .../gymnasium_basics/vector_envs_tutorial.py b/docs/tutorials/vector_envs_tutorial.py → .../gymnasium_basics/vector_envs_tutorial.py
@@ -1,13 +1,13 @@
 """
 Training A2C with Vector Envs and Domain Randomization
-=================================
+======================================================
 
 """
 
 
 # %%
 # Introduction
-# ------------------------------
+# ------------
 #
 # In this tutorial, you'll learn how to use vectorized environments to train an Advantage Actor-Critic agent.
 # We are going to use A2C, which is the synchronous version of the A3C algorithm [1].
@@ -56,7 +56,7 @@
 
 # %%
 # Advantage Actor-Critic (A2C)
-# ------------------------------
+# ----------------------------
 #
 # The Actor-Critic combines elements of value-based and policy-based methods. In A2C, the agent has two separate neural networks:
 # a critic network that estimates the state-value function, and an actor network that outputs logits for a categorical probability distribution over all actions.
@@ -241,7 +241,7 @@ def update_parameters(
 
 # %%
 # Using Vectorized Environments
-# ------------------------------
+# -----------------------------
 #
 # When you calculate the losses for the two Neural Networks over only one epoch, it might have a high variance. With vectorized environments,
 # we can play with `n_envs` in parallel and thus get up to a linear speedup (meaning that in theory, we collect samples `n_envs` times quicker)
@@ -259,7 +259,7 @@ def update_parameters(
 
 # %%
 # Domain Randomization
-# ------------------------------
+# --------------------
 #
 # If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment
 # and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them.
@@ -337,7 +337,7 @@ def update_parameters(
 
 # %%
 # Setup
-# ------------------------------
+# -----
 #
 
 # environment hyperparams
@@ -398,7 +398,7 @@ def update_parameters(
 
 # %%
 # Training the A2C Agent
-# ------------------------------
+# ----------------------
 #
 # For our training loop, we are using the `RecordEpisodeStatistics` wrapper to record the episode lengths and returns and we are also saving
 # the losses and entropies to plot them after the agent finished training.
@@ -478,7 +478,7 @@ def update_parameters(
 
 # %%
 # Plotting
-# ------------------------------
+# --------
 #
 
 """ plot the results """
@@ -550,7 +550,7 @@ def update_parameters(
 
 # %%
 # Performance Analysis of Synchronous and Asynchronous Vectorized Environments
-# ------------------------------
+# ----------------------------------------------------------------------------
 #
 
 # %%
@@ -608,7 +608,7 @@ def update_parameters(
 
 # %%
 # Saving/ Loading Weights
-# ------------------------------
+# -----------------------
 #
 
 save_weights = False
@@ -638,7 +638,7 @@ def update_parameters(
 
 # %%
 # Showcase the Agent
-# ------------------------------
+# ------------------
 #
 
 """ play a couple of showcase episodes """
@@ -690,7 +690,7 @@ def update_parameters(
 
 # %%
 # Try playing the environment yourself
-# ------------------------------
+# ------------------------------------
 #
 
 # from gymnasium.utils.play import play
@@ -701,7 +701,7 @@ def update_parameters(
 
 # %%
 # References
-# ------------------------------
+# ----------
 #
 # [1] V. Mnih, A. P. Badia, M. Mirza, A. Graves, T. P. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu. "Asynchronous Methods for Deep Reinforcement Learning" ICML (2016).
 #

diff --git a/docs/tutorials/training_agents/README.rst b/docs/tutorials/training_agents/README.rst
@@ -0,0 +1,2 @@
+Training Agents
+---------------
diff --git a/docs/tutorials/blackjack_tutorial.py → ...als/training_agents/blackjack_tutorial.py b/docs/tutorials/blackjack_tutorial.py → ...als/training_agents/blackjack_tutorial.py
diff --git a/docs/tutorials/reinforce_invpend_gym_v26.py → ...ining_agents/reinforce_invpend_gym_v26.py b/docs/tutorials/reinforce_invpend_gym_v26.py → ...ining_agents/reinforce_invpend_gym_v26.py