diff --git a/docs/tutorials/training_agents/blackjack_tutorial.py b/docs/tutorials/training_agents/blackjack_tutorial.py index e91b951b3..31495887d 100644 --- a/docs/tutorials/training_agents/blackjack_tutorial.py +++ b/docs/tutorials/training_agents/blackjack_tutorial.py @@ -299,6 +299,7 @@ def decay_epsilon(self): rolling_length = 500 fig, axs = plt.subplots(ncols=3, figsize=(12, 5)) axs[0].set_title("Episode rewards") +# compute and assign a rolling average of the data to provide a smoother graph reward_moving_average = ( np.convolve( np.array(env.return_queue).flatten(), np.ones(rolling_length), mode="valid"