Skip to content

Commit

Permalink
Deploying to gh-pages from @ 7e2062d 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
pseudo-rnd-thoughts committed Nov 28, 2024
1 parent 69e7546 commit 20bc05e
Show file tree
Hide file tree
Showing 7 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion main/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 6e61249047292981a84104a53735e4b9
config: 77df1ca8f1dadbd38f8151723b35a772
tags: d77d1c0d9ca2f4c8421862c7c5a0d620
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@
},
"outputs": [],
"source": [
"env = gym.wrappers.RecordEpisodeStatistics(env, buffer_length=n_episodes)\nfor episode in tqdm(range(n_episodes)):\n obs, info = env.reset()\n done = False\n\n # play one episode\n while not done:\n action = agent.get_action(env, obs)\n next_obs, reward, terminated, truncated, info = env.step(action)\n\n # update the agent\n agent.update(obs, action, reward, terminated, next_obs)\n\n # update if the environment is done and the current obs\n done = terminated or truncated\n obs = next_obs\n\n agent.decay_epsilon()"
"env = gym.wrappers.RecordEpisodeStatistics(env, deque_size=n_episodes)\nfor episode in tqdm(range(n_episodes)):\n obs, info = env.reset()\n done = False\n\n # play one episode\n while not done:\n action = agent.get_action(env, obs)\n next_obs, reward, terminated, truncated, info = env.step(action)\n\n # update the agent\n agent.update(obs, action, reward, terminated, next_obs)\n\n # update if the environment is done and the current obs\n done = terminated or truncated\n obs = next_obs\n\n agent.decay_epsilon()"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def decay_epsilon(self):
#


env = gym.wrappers.RecordEpisodeStatistics(env, buffer_length=n_episodes)
env = gym.wrappers.RecordEpisodeStatistics(env, deque_size=n_episodes)
for episode in tqdm(range(n_episodes)):
obs, info = env.reset()
done = False
Expand Down
2 changes: 1 addition & 1 deletion main/searchindex.js

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ <h2>Building an agent<a class="headerlink" href="#building-an-agent" title="Link
<p>Info: The current hyperparameters are set to quickly train a decent agent.
If you want to converge to the optimal policy, try increasing
the n_episodes by 10x and lower the learning_rate (e.g. to 0.001).</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">wrappers</span><span class="o">.</span><span class="n">RecordEpisodeStatistics</span><span class="p">(</span><span class="n">env</span><span class="p">,</span> <span class="n">buffer_length</span><span class="o">=</span><span class="n">n_episodes</span><span class="p">)</span>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">wrappers</span><span class="o">.</span><span class="n">RecordEpisodeStatistics</span><span class="p">(</span><span class="n">env</span><span class="p">,</span> <span class="n">deque_size</span><span class="o">=</span><span class="n">n_episodes</span><span class="p">)</span>
<span class="k">for</span> <span class="n">episode</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">n_episodes</span><span class="p">)):</span>
<span class="n">obs</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="n">done</span> <span class="o">=</span> <span class="kc">False</span>
Expand Down

0 comments on commit 20bc05e

Please sign in to comment.