From c11ac0501594fb2b5bf3b932698f2b4085bf7414 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Thu, 2 Jan 2025 00:48:30 +0200
Subject: [PATCH] Minor Documentation improvements in `HumanoidStandup` (#1284)

---
 gymnasium/envs/mujoco/humanoidstandup_v5.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gymnasium/envs/mujoco/humanoidstandup_v5.py b/gymnasium/envs/mujoco/humanoidstandup_v5.py
index 3a0e54228..072128817 100644
--- a/gymnasium/envs/mujoco/humanoidstandup_v5.py
+++ b/gymnasium/envs/mujoco/humanoidstandup_v5.py
@@ -195,11 +195,11 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
     A reward for moving up (trying to stand up).
     This is not a relative reward, measuring how far up the robot has moved since the last timestep,
     but an absolute reward measuring how far up the Humanoid has moved up in total.
-    It is measured as $w{uph} \times (z_{after action} - 0)/dt$,
-    where $z_{after action}$ is the z coordinate of the torso after taking an action,
+    It is measured as $w_{uph} \times \frac{z_{after\_action} - 0}{dt}$,
+    where $z_{after\_action}$ is the z coordinate of the torso after taking an action,
     and $dt$ is the time between actions, which depends on the `frame_skip` parameter (default is $5$),
     and `frametime`, which is $0.01$ - so the default is $dt = 5 \times 0.01 = 0.05$,
-    and $w_{uph}$ is `uph_cost_weight`.
+    and $w_{uph}$ is `uph_cost_weight` (default is $1$).
     - *quad_ctrl_cost*:
     A negative reward to penalize the Humanoid for taking actions that are too large.
     $w_{quad\_control} \times \|action\|_2^2$,