save functioning setup

kscalelabs · Jan 9, 2025 · 895271e · 895271e
1 parent 0d2e078
commit 895271e
Show file tree

Hide file tree

Showing 6 changed files with 29 additions and 45 deletions.
diff --git a/examples/zbot_walking.kinfer b/examples/zbot_walking.kinfer
diff --git a/sim/envs/humanoids/zbot2_config.py b/sim/envs/humanoids/zbot2_config.py
@@ -19,7 +19,8 @@ class env(LeggedRobotCfg.env):
         # change the observation dim
         frame_stack = 15
         c_frame_stack = 3
-        num_single_obs = 11 + NUM_JOINTS * 3
+        # num_single_obs = 11 + NUM_JOINTS * 3
+        num_single_obs = 9 + NUM_JOINTS * 3 # pfb30
         num_observations = int(frame_stack * num_single_obs)
         single_num_privileged_obs = 25 + NUM_JOINTS * 4
         num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)

diff --git a/sim/envs/humanoids/zbot2_env.py b/sim/envs/humanoids/zbot2_env.py
@@ -235,8 +235,10 @@ def compute_observations(self):
                 q,  # 20D
                 dq,  # 20D
                 self.actions,  # 20D
-                self.base_ang_vel * self.obs_scales.ang_vel,  # 3
-                observation_imu,  # 3
+                # pfb30
+                self.base_quat, # 4
+                # self.base_ang_vel * self.obs_scales.ang_vel,  # 3
+                # observation_imu,  # 3
             ),
             dim=-1,
         )

diff --git a/sim/model_export2.py b/sim/model_export2.py
@@ -108,8 +108,9 @@ def __init__(self, policy: nn.Module, cfg: ActorCfg) -> None:
         self.frame_stack = cfg.frame_stack
 
         # 11 is the number of single observation features - 6 from IMU, 5 from command input
+        # 9 is the number of single observation features - 4 from IMU(quat), 5 from command input
         # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
-        self.num_single_obs = 11 + self.num_actions * 3
+        self.num_single_obs = 9 + self.num_actions * 3 # pfb30
         self.num_observations = int(self.frame_stack * self.num_single_obs)
 
         self.policy = policy
@@ -143,8 +144,8 @@ def forward(
         dof_pos: Tensor,  # current angular position of the DoFs relative to default
         dof_vel: Tensor,  # current angular velocity of the DoFs
         prev_actions: Tensor,  # previous actions taken by the model
-        imu_ang_vel: Tensor,  # angular velocity of the IMU
-        imu_euler_xyz: Tensor,  # euler angles of the IMU
+        quat: Tensor,  # quaternion of the IMU
+        # imu_euler_xyz: Tensor,  # euler angles of the IMU
         buffer: Tensor,  # buffer of previous observations
     ) -> Tuple[Tensor, Tensor, Tensor]:
         """Runs the actor model forward pass.
@@ -190,19 +191,20 @@ def forward(
         q = dof_pos * self.dof_pos_scale
         dq = dof_vel * self.dof_vel_scale
 
-        if self.use_projected_gravity:
-            imu_observation = imu_euler_xyz
-        else:
-            imu_observation = imu_euler_xyz * self.quat_scale
+        # if self.use_projected_gravity:
+        #     imu_observation = imu_euler_xyz
+        # else:
+        #     imu_observation = imu_euler_xyz * self.quat_scale
         # Construct new observation
         new_x = torch.cat(
             (
                 command_input,
                 q,
                 dq,
                 prev_actions,
-                imu_ang_vel * self.ang_vel_scale,
-                imu_observation,
+                quat
+                # imu_ang_vel * self.ang_vel_scale,
+                # imu_observation,
             ),
             dim=0,
         )
@@ -248,10 +250,10 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
     dof_pos = torch.randn(a_model.num_actions)
     dof_vel = torch.randn(a_model.num_actions)
     prev_actions = torch.randn(a_model.num_actions)
-    imu_ang_vel = torch.randn(3)
-    imu_euler_xyz = torch.randn(3)
+    quat = torch.randn(4)
+    # imu_euler_xyz = torch.randn(3)
     buffer = a_model.get_init_buffer()
-    input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, imu_ang_vel, imu_euler_xyz, buffer)
+    input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, quat, buffer)
 
     jit_model = torch.jit.script(a_model)
 

diff --git a/sim/resources/zbot2/robot_fixed.xml b/sim/resources/zbot2/robot_fixed.xml
@@ -162,7 +162,6 @@
     <motor name="R_Knee_Pitch" joint="R_Knee_Pitch" ctrllimited="true" ctrlrange="-10 10" gear="1" />
     <motor name="R_Ankle_Pitch" joint="R_Ankle_Pitch" ctrllimited="true" ctrlrange="-10 10" gear="1" /> -->
 
-
   </actuator>
 
   <sensor>

diff --git a/sim/sim2sim2.py b/sim/sim2sim2.py
@@ -160,8 +160,9 @@ def run_mujoco(
         "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32),
         "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32),
         "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32),
-        "imu_ang_vel.1": np.zeros(3).astype(np.float32),
-        "imu_euler_xyz.1": np.zeros(3).astype(np.float32),
+        # "imu_ang_vel.1": np.zeros(3).astype(np.float32),
+        # "imu_euler_xyz.1": np.zeros(3).astype(np.float32),
+        "quat.1": np.zeros(4).astype(np.float32),
         "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32),
     }
 
@@ -191,8 +192,8 @@ def run_mujoco(
         q = q[-model_info["num_actions"] :]
         dq = dq[-model_info["num_actions"] :]
 
-        eu_ang = quaternion_to_euler_array(quat)
-        eu_ang[eu_ang > math.pi] -= 2 * math.pi
+        # eu_ang = quaternion_to_euler_array(quat)
+        # eu_ang[eu_ang > math.pi] -= 2 * math.pi
 
         # eu_ang = np.array([0.0, 0.0, 0.0])
         # omega = np.array([0.0, 0.0, 0.0])
@@ -219,41 +220,20 @@ def run_mujoco(
 
             input_data["prev_actions.1"] = prev_actions.astype(np.float32)
 
-            input_data["imu_ang_vel.1"] = omega.astype(np.float32)
-            input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32)
+            input_data["quat.1"] = quat.astype(np.float32)
+            # input_data["imu_ang_vel.1"] = omega.astype(np.float32)
+            # input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32)
 
             input_data["buffer.1"] = hist_obs.astype(np.float32)
 
-            print(eu_ang)
+            print(quat)
             policy_output = policy(input_data)
             positions = policy_output["actions_scaled"]
             curr_actions = policy_output["actions"]
             hist_obs = policy_output["x.3"]
 
             target_q = positions
 
-            if log_h5:
-                logger.log_data(
-                    {
-                        "t": np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32),
-                        "2D_command": np.array(
-                            [
-                                np.sin(2 * math.pi * count_lowlevel * model_info["sim_dt"] / model_info["cycle_time"]),
-                                np.cos(2 * math.pi * count_lowlevel * model_info["sim_dt"] / model_info["cycle_time"]),
-                            ],
-                            dtype=np.float32,
-                        ),
-                        "3D_command": np.array([x_vel_cmd, y_vel_cmd, yaw_vel_cmd], dtype=np.float32),
-                        "joint_pos": cur_pos_obs.astype(np.float32),
-                        "joint_vel": cur_vel_obs.astype(np.float32),
-                        "prev_actions": prev_actions.astype(np.float32),
-                        "curr_actions": curr_actions.astype(np.float32),
-                        "ang_vel": omega.astype(np.float32),
-                        "euler_rotation": eu_ang.astype(np.float32),
-                        "buffer": hist_obs.astype(np.float32),
-                    }
-                )
-
             prev_actions = curr_actions
 
         # Generate PD control