From 85a2215595ff3c5a6b56a29d874a29eb3428ab7a Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Thu, 14 Mar 2024 15:57:50 +0000 Subject: [PATCH] Update Lunar lander step to match performance with Gymnasium (#91) --- .../envs/lunar_lander/lunar_lander.py | 70 +++++++++++++------ 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/mo_gymnasium/envs/lunar_lander/lunar_lander.py b/mo_gymnasium/envs/lunar_lander/lunar_lander.py index 89b20723..c8d94d6a 100644 --- a/mo_gymnasium/envs/lunar_lander/lunar_lander.py +++ b/mo_gymnasium/envs/lunar_lander/lunar_lander.py @@ -6,6 +6,7 @@ FPS, LEG_DOWN, MAIN_ENGINE_POWER, + MAIN_ENGINE_Y_LOCATION, SCALE, SIDE_ENGINE_AWAY, SIDE_ENGINE_HEIGHT, @@ -46,7 +47,7 @@ def __init__(self, *args, **kwargs): def step(self, action): assert self.lander is not None - # Update wind + # Update wind and apply to the lander assert self.lander is not None, "You forgot to call reset()" if self.enable_wind and not (self.legs[0].ground_contact or self.legs[1].ground_contact): # the function used for wind is tanh(sin(2 k x) + sin(pi k x)), @@ -60,12 +61,13 @@ def step(self, action): # the function used for torque is tanh(sin(2 k x) + sin(pi k x)), # which is proven to never be periodic, k = 0.01 - torque_mag = math.tanh(math.sin(0.02 * self.torque_idx) + (math.sin(math.pi * 0.01 * self.torque_idx))) * ( - self.turbulence_power + torque_mag = ( + math.tanh(math.sin(0.02 * self.torque_idx) + (math.sin(math.pi * 0.01 * self.torque_idx))) + * self.turbulence_power ) self.torque_idx += 1 self.lander.ApplyTorque( - (torque_mag), + torque_mag, True, ) @@ -74,9 +76,15 @@ def step(self, action): else: assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid " - # Engines + # Apply Engine Impulses + + # Tip is the (X and Y) components of the rotation of the lander. tip = (math.sin(self.lander.angle), math.cos(self.lander.angle)) + + # Side is the (-Y and X) components of the rotation of the lander. side = (-tip[1], tip[0]) + + # Generate two random numbers between -1/SCALE and 1/SCALE. dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)] m_power = 0.0 @@ -87,21 +95,29 @@ def step(self, action): assert m_power >= 0.5 and m_power <= 1.0 else: m_power = 1.0 + # 4 is move a bit downwards, +-2 for randomness - ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1] - oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1] + # The components of the impulse to be applied by the main engine. + ox = tip[0] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1] + oy = -tip[1] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1] + impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy) - p = self._create_particle( - 3.5, # 3.5 is here to make particle speed adequate - impulse_pos[0], - impulse_pos[1], - m_power, - ) # particles are just a decoration - p.ApplyLinearImpulse( - (ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power), - impulse_pos, - True, - ) + if self.render_mode is not None: + # particles are just a decoration, with no impact on the physics, so don't add them when not rendering + p = self._create_particle( + 3.5, # 3.5 is here to make particle speed adequate + impulse_pos[0], + impulse_pos[1], + m_power, + ) + p.ApplyLinearImpulse( + ( + ox * MAIN_ENGINE_POWER * m_power, + oy * MAIN_ENGINE_POWER * m_power, + ), + impulse_pos, + True, + ) self.lander.ApplyLinearImpulse( (-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power), impulse_pos, @@ -110,25 +126,36 @@ def step(self, action): s_power = 0.0 if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]): - # Orientation engines + # Orientation/Side engines if self.continuous: direction = np.sign(action[1]) s_power = np.clip(np.abs(action[1]), 0.5, 1.0) assert s_power >= 0.5 and s_power <= 1.0 else: + # action = 1 is left, action = 3 is right direction = action - 2 s_power = 1.0 + + # The components of the impulse to be applied by the side engines. ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE) oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE) + + # The constant 17 is a constant, that is presumably meant to be SIDE_ENGINE_HEIGHT. + # However, SIDE_ENGINE_HEIGHT is defined as 14 + # This causes the position of the thrust on the body of the lander to change, depending on the orientation of the lander. + # This in turn results in an orientation dependent torque being applied to the lander. impulse_pos = ( self.lander.position[0] + ox - tip[0] * 17 / SCALE, self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE, ) if self.render_mode is not None: - # particles are just a decoration, so don't add them when not rendering + # particles are just a decoration, with no impact on the physics, so don't add them when not rendering p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power) p.ApplyLinearImpulse( - (ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power), + ( + ox * SIDE_ENGINE_POWER * s_power, + oy * SIDE_ENGINE_POWER * s_power, + ), impulse_pos, True, ) @@ -142,6 +169,7 @@ def step(self, action): pos = self.lander.position vel = self.lander.linearVelocity + state = [ (pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2), (pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),