From 85a2215595ff3c5a6b56a29d874a29eb3428ab7a Mon Sep 17 00:00:00 2001
From: Mark Towers <mark.m.towers@gmail.com>
Date: Thu, 14 Mar 2024 15:57:50 +0000
Subject: [PATCH] Update Lunar lander step to match performance with Gymnasium
 (#91)

---
 .../envs/lunar_lander/lunar_lander.py         | 70 +++++++++++++------
 1 file changed, 49 insertions(+), 21 deletions(-)

diff --git a/mo_gymnasium/envs/lunar_lander/lunar_lander.py b/mo_gymnasium/envs/lunar_lander/lunar_lander.py
index 89b20723..c8d94d6a 100644
--- a/mo_gymnasium/envs/lunar_lander/lunar_lander.py
+++ b/mo_gymnasium/envs/lunar_lander/lunar_lander.py
@@ -6,6 +6,7 @@
     FPS,
     LEG_DOWN,
     MAIN_ENGINE_POWER,
+    MAIN_ENGINE_Y_LOCATION,
     SCALE,
     SIDE_ENGINE_AWAY,
     SIDE_ENGINE_HEIGHT,
@@ -46,7 +47,7 @@ def __init__(self, *args, **kwargs):
     def step(self, action):
         assert self.lander is not None
 
-        # Update wind
+        # Update wind and apply to the lander
         assert self.lander is not None, "You forgot to call reset()"
         if self.enable_wind and not (self.legs[0].ground_contact or self.legs[1].ground_contact):
             # the function used for wind is tanh(sin(2 k x) + sin(pi k x)),
@@ -60,12 +61,13 @@ def step(self, action):
 
             # the function used for torque is tanh(sin(2 k x) + sin(pi k x)),
             # which is proven to never be periodic, k = 0.01
-            torque_mag = math.tanh(math.sin(0.02 * self.torque_idx) + (math.sin(math.pi * 0.01 * self.torque_idx))) * (
-                self.turbulence_power
+            torque_mag = (
+                math.tanh(math.sin(0.02 * self.torque_idx) + (math.sin(math.pi * 0.01 * self.torque_idx)))
+                * self.turbulence_power
             )
             self.torque_idx += 1
             self.lander.ApplyTorque(
-                (torque_mag),
+                torque_mag,
                 True,
             )
 
@@ -74,9 +76,15 @@ def step(self, action):
         else:
             assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid "
 
-        # Engines
+        # Apply Engine Impulses
+
+        # Tip is the (X and Y) components of the rotation of the lander.
         tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
+
+        # Side is the (-Y and X) components of the rotation of the lander.
         side = (-tip[1], tip[0])
+
+        # Generate two random numbers between -1/SCALE and 1/SCALE.
         dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
 
         m_power = 0.0
@@ -87,21 +95,29 @@ def step(self, action):
                 assert m_power >= 0.5 and m_power <= 1.0
             else:
                 m_power = 1.0
+
             # 4 is move a bit downwards, +-2 for randomness
-            ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
-            oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
+            # The components of the impulse to be applied by the main engine.
+            ox = tip[0] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
+            oy = -tip[1] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
+
             impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
-            p = self._create_particle(
-                3.5,  # 3.5 is here to make particle speed adequate
-                impulse_pos[0],
-                impulse_pos[1],
-                m_power,
-            )  # particles are just a decoration
-            p.ApplyLinearImpulse(
-                (ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
-                impulse_pos,
-                True,
-            )
+            if self.render_mode is not None:
+                # particles are just a decoration, with no impact on the physics, so don't add them when not rendering
+                p = self._create_particle(
+                    3.5,  # 3.5 is here to make particle speed adequate
+                    impulse_pos[0],
+                    impulse_pos[1],
+                    m_power,
+                )
+                p.ApplyLinearImpulse(
+                    (
+                        ox * MAIN_ENGINE_POWER * m_power,
+                        oy * MAIN_ENGINE_POWER * m_power,
+                    ),
+                    impulse_pos,
+                    True,
+                )
             self.lander.ApplyLinearImpulse(
                 (-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
                 impulse_pos,
@@ -110,25 +126,36 @@ def step(self, action):
 
         s_power = 0.0
         if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]):
-            # Orientation engines
+            # Orientation/Side engines
             if self.continuous:
                 direction = np.sign(action[1])
                 s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
                 assert s_power >= 0.5 and s_power <= 1.0
             else:
+                # action = 1 is left, action = 3 is right
                 direction = action - 2
                 s_power = 1.0
+
+            # The components of the impulse to be applied by the side engines.
             ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE)
             oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE)
+
+            # The constant 17 is a constant, that is presumably meant to be SIDE_ENGINE_HEIGHT.
+            # However, SIDE_ENGINE_HEIGHT is defined as 14
+            # This causes the position of the thrust on the body of the lander to change, depending on the orientation of the lander.
+            # This in turn results in an orientation dependent torque being applied to the lander.
             impulse_pos = (
                 self.lander.position[0] + ox - tip[0] * 17 / SCALE,
                 self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
             )
             if self.render_mode is not None:
-                # particles are just a decoration, so don't add them when not rendering
+                # particles are just a decoration, with no impact on the physics, so don't add them when not rendering
                 p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
                 p.ApplyLinearImpulse(
-                    (ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
+                    (
+                        ox * SIDE_ENGINE_POWER * s_power,
+                        oy * SIDE_ENGINE_POWER * s_power,
+                    ),
                     impulse_pos,
                     True,
                 )
@@ -142,6 +169,7 @@ def step(self, action):
 
         pos = self.lander.position
         vel = self.lander.linearVelocity
+
         state = [
             (pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
             (pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),