Skip to content

Commit

Permalink
Tutorials galleries (openai#258)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoulao authored Jan 11, 2023
1 parent 35fe9b0 commit 4e6dc3e
Show file tree
Hide file tree
Showing 14 changed files with 54 additions and 53 deletions.
9 changes: 6 additions & 3 deletions docs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@ __pycache__
build/
_build/

tutorials/*
!tutorials/*.md
!tutorials/*.py
tutorials/**/*.pickle
tutorials/**/images/
tutorials/**/*.rst
tutorials/**/*.ipynb
tutorials/**/*.zip
!tutorials/**/README.rst

environments/**/list.html
environments/**/complete_list.html
Expand Down
33 changes: 27 additions & 6 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@

# -- Project information -----------------------------------------------------
import os
import re
from typing import Any, Dict

from furo import gen_tutorials
import sphinx_gallery.gen_rst

import gymnasium

Expand All @@ -43,6 +44,7 @@
"sphinx.ext.githubpages",
"myst_parser",
"furo.gen_tutorials",
"sphinx_gallery.gen_gallery",
"sphinx_github_changelog",
]

Expand All @@ -52,7 +54,7 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["tutorials/demo.rst"]
exclude_patterns = ["tutorials/README.rst"]

# Napoleon settings
napoleon_use_ivar = True
Expand Down Expand Up @@ -95,10 +97,29 @@

# -- Generate Tutorials -------------------------------------------------

gen_tutorials.generate(
os.path.dirname(__file__),
os.path.join(os.path.dirname(__file__), "tutorials"),
)
sphinx_gallery.gen_rst.EXAMPLE_HEADER = """
.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "{0}"
.. LINE NUMBERS ARE GIVEN BELOW.
.. rst-class:: sphx-glr-example-title
.. _sphx_glr_{1}:
"""

sphinx_gallery_conf = {
"ignore_pattern": r"__init__\.py",
"examples_dirs": "./tutorials",
"gallery_dirs": "./tutorials",
"show_signature": False,
"show_memory": False,
"min_reported_time": float("inf"),
"filename_pattern": f"{re.escape(os.sep)}run_",
"default_thumb_file": "_static/img/gymnasium-github.png",
}

# -- Generate Changelog -------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ environments/third_party_environments
:glob:
:caption: Tutorials
tutorials/*
tutorials/**/index
```

```{toctree}
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
sphinx
sphinx-autobuild
myst-parser
sphinx_gallery
git+https://github.com/sphinx-gallery/sphinx-gallery.git@4006662c8c1984453a247dc6d3df6260e5b00f4b#egg=sphinx_gallery
git+https://github.com/Farama-Foundation/Celshast#egg=furo
moviepy
pygame
Expand Down
2 changes: 2 additions & 0 deletions docs/tutorials/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Tutorials
=========
29 changes: 0 additions & 29 deletions docs/tutorials/demo.py

This file was deleted.

2 changes: 2 additions & 0 deletions docs/tutorials/gymnasium_basics/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Gymnasium Basics
----------------
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""
Training A2C with Vector Envs and Domain Randomization
=================================
======================================================
"""


# %%
# Introduction
# ------------------------------
# ------------
#
# In this tutorial, you'll learn how to use vectorized environments to train an Advantage Actor-Critic agent.
# We are going to use A2C, which is the synchronous version of the A3C algorithm [1].
Expand Down Expand Up @@ -56,7 +56,7 @@

# %%
# Advantage Actor-Critic (A2C)
# ------------------------------
# ----------------------------
#
# The Actor-Critic combines elements of value-based and policy-based methods. In A2C, the agent has two separate neural networks:
# a critic network that estimates the state-value function, and an actor network that outputs logits for a categorical probability distribution over all actions.
Expand Down Expand Up @@ -241,7 +241,7 @@ def update_parameters(

# %%
# Using Vectorized Environments
# ------------------------------
# -----------------------------
#
# When you calculate the losses for the two Neural Networks over only one epoch, it might have a high variance. With vectorized environments,
# we can play with `n_envs` in parallel and thus get up to a linear speedup (meaning that in theory, we collect samples `n_envs` times quicker)
Expand All @@ -259,7 +259,7 @@ def update_parameters(

# %%
# Domain Randomization
# ------------------------------
# --------------------
#
# If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment
# and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them.
Expand Down Expand Up @@ -337,7 +337,7 @@ def update_parameters(

# %%
# Setup
# ------------------------------
# -----
#

# environment hyperparams
Expand Down Expand Up @@ -398,7 +398,7 @@ def update_parameters(

# %%
# Training the A2C Agent
# ------------------------------
# ----------------------
#
# For our training loop, we are using the `RecordEpisodeStatistics` wrapper to record the episode lengths and returns and we are also saving
# the losses and entropies to plot them after the agent finished training.
Expand Down Expand Up @@ -478,7 +478,7 @@ def update_parameters(

# %%
# Plotting
# ------------------------------
# --------
#

""" plot the results """
Expand Down Expand Up @@ -550,7 +550,7 @@ def update_parameters(

# %%
# Performance Analysis of Synchronous and Asynchronous Vectorized Environments
# ------------------------------
# ----------------------------------------------------------------------------
#

# %%
Expand Down Expand Up @@ -608,7 +608,7 @@ def update_parameters(

# %%
# Saving/ Loading Weights
# ------------------------------
# -----------------------
#

save_weights = False
Expand Down Expand Up @@ -638,7 +638,7 @@ def update_parameters(

# %%
# Showcase the Agent
# ------------------------------
# ------------------
#

""" play a couple of showcase episodes """
Expand Down Expand Up @@ -690,7 +690,7 @@ def update_parameters(

# %%
# Try playing the environment yourself
# ------------------------------
# ------------------------------------
#

# from gymnasium.utils.play import play
Expand All @@ -701,7 +701,7 @@ def update_parameters(

# %%
# References
# ------------------------------
# ----------
#
# [1] V. Mnih, A. P. Badia, M. Mirza, A. Graves, T. P. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu. "Asynchronous Methods for Deep Reinforcement Learning" ICML (2016).
#
Expand Down
2 changes: 2 additions & 0 deletions docs/tutorials/training_agents/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Training Agents
---------------
File renamed without changes.
File renamed without changes.

0 comments on commit 4e6dc3e

Please sign in to comment.