kscalelabs · WT-MM · Jan 17, 2025 · Jan 21, 2025 · Jan 22, 2025 · Jan 22, 2025
diff --git a/examples/new.kinfer b/examples/new.kinfer
diff --git a/examples/randomization.kinfer b/examples/randomization.kinfer
diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py
@@ -14,14 +14,20 @@
 from sim.envs.humanoids.g1_env import G1FreeEnv
 from sim.envs.humanoids.gpr_config import GprCfg, GprCfgPPO, GprStandingCfg
 from sim.envs.humanoids.gpr_env import GprFreeEnv
+from sim.envs.humanoids.gpr_headless_config import GprHeadlessCfg, GprHeadlessCfgPPO
+from sim.envs.humanoids.gpr_headless_env import GprHeadlessEnv
+from sim.envs.humanoids.gpr_latency_config import (
+    GprLatencyCfg,
+    GprLatencyCfgPPO,
+    GprLatencyStandingCfg,
+)
+from sim.envs.humanoids.gpr_latency_env import GprLatencyEnv
 from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO
 from sim.envs.humanoids.h1_env import H1FreeEnv
 from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
 from sim.envs.humanoids.xbot_env import XBotLFreeEnv
 from sim.envs.humanoids.zbot2_config import ZBot2Cfg, ZBot2CfgPPO, ZBot2StandingCfg
 from sim.envs.humanoids.zbot2_env import ZBot2Env
-from sim.envs.humanoids.zeroth_config import ZerothCfg, ZerothCfgPPO
-from sim.envs.humanoids.zeroth_env import ZerothEnv
 from sim.utils.task_registry import TaskRegistry  # noqa: E402
 
 task_registry = TaskRegistry()
@@ -33,4 +39,6 @@
 task_registry.register("XBotL_free", XBotLFreeEnv, XBotCfg(), XBotCfgPPO())
 task_registry.register("zbot2", ZBot2Env, ZBot2Cfg(), ZBot2CfgPPO())
 task_registry.register("zbot2_standing", ZBot2Env, ZBot2StandingCfg(), ZBot2CfgPPO())
-task_registry.register("zeroth", ZerothEnv, ZerothCfg(), ZerothCfgPPO())
+task_registry.register("gpr_headless", GprHeadlessEnv, GprHeadlessCfg(), GprHeadlessCfgPPO())
+task_registry.register("gpr_latency", GprLatencyEnv, GprLatencyCfg(), GprLatencyCfgPPO())
+task_registry.register("gpr_latency_standing", GprLatencyEnv, GprLatencyStandingCfg(), GprLatencyCfgPPO())
diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
@@ -17,9 +17,9 @@ class GprCfg(LeggedRobotCfg):
 
     class env(LeggedRobotCfg.env):
         # change the observation dim
-        frame_stack = 15
-        c_frame_stack = 3
-        num_single_obs = 11 + NUM_JOINTS * 3
+        frame_stack = 15  # Actor
+        c_frame_stack = 3  # Critic
+        num_single_obs = 8 + NUM_JOINTS * 3 + 3  # Add ang vel
         num_observations = int(frame_stack * num_single_obs)
         single_num_privileged_obs = 25 + NUM_JOINTS * 4
         num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
@@ -28,93 +28,6 @@ class env(LeggedRobotCfg.env):
         episode_length_s = 24  # episode length in seconds
         use_ref_actions = False
 
-        # input_schema = P.IOSchema(
-        #     values=[
-        #         P.ValueSchema(
-        #             value_name="vector_command",
-        #             vector_command=P.VectorCommandSchema(
-        #                 dimensions=3,  # x_vel, y_vel, rot
-        #             ),
-        #         ),
-        #         P.ValueSchema(
-        #             value_name="timestamp",
-        #             timestamp=P.TimestampSchema(
-        #                 start_seconds=0,
-        #             ),
-        #         ),
-        #         P.ValueSchema(
-        #             value_name="dof_pos",
-        #             joint_positions=P.JointPositionsSchema(
-        #                 joint_names=Robot.joint_names(),
-        #                 unit=P.JointPositionUnit.RADIANS,
-        #             ),
-        #         ),
-        #         P.ValueSchema(
-        #             value_name="dof_vel",
-        #             joint_velocities=P.JointVelocitiesSchema(
-        #                 joint_names=Robot.joint_names(),
-        #                 unit=P.JointVelocityUnit.RADIANS_PER_SECOND,
-        #             ),
-        #         ),
-        #         P.ValueSchema(
-        #             value_name="prev_actions",
-        #             joint_positions=P.JointPositionsSchema(
-        #                 joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS
-        #             ),
-        #         ),
-        #         # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data
-        #         P.ValueSchema(
-        #             value_name="imu_ang_vel",
-        #             imu=P.ImuSchema(
-        #                 use_accelerometer=False,
-        #                 use_gyroscope=True,
-        #                 use_magnetometer=False,
-        #             ),
-        #         ),
-        #         P.ValueSchema(
-        #             value_name="imu_euler_xyz",
-        #             imu=P.ImuSchema(
-        #                 use_accelerometer=True,
-        #                 use_gyroscope=False,
-        #                 use_magnetometer=False,
-        #             ),
-        #         ),
-        #         P.ValueSchema(
-        #             value_name="hist_obs",
-        #             state_tensor=P.StateTensorSchema(
-        #                 # 11 is the number of single observation features - 6 from IMU, 5 from command input
-        #                 # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
-        #                 shape=[frame_stack * (11 + NUM_JOINTS * 3)],
-        #                 dtype=P.DType.FP32,
-        #             ),
-        #         ),
-        #     ]
-        # )
-
-        # output_schema = P.IOSchema(
-        #     values=[
-        #         P.ValueSchema(
-        #             value_name="actions",
-        #             joint_positions=P.JointPositionsSchema(
-        #                 joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS
-        #             ),
-        #         ),
-        #         P.ValueSchema(
-        #             value_name="actions_raw",
-        #             joint_positions=P.JointPositionsSchema(
-        #                 joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS
-        #             ),
-        #         ),
-        #         P.ValueSchema(
-        #             value_name="new_x",
-        #             state_tensor=P.StateTensorSchema(
-        #                 shape=[frame_stack * (11 + NUM_JOINTS * 3)],
-        #                 dtype=P.DType.FP32,
-        #             ),
-        #         ),
-        #     ]
-        # )
-
     class safety(LeggedRobotCfg.safety):
         # safety factors
         pos_limit = 1.0
@@ -128,7 +41,7 @@ class asset(LeggedRobotCfg.asset):
 
         foot_name = ["foot1", "foot3"]
         knee_name = ["leg3_shell2", "leg3_shell22"]
-        imu_name = "imu_link"
+        imu_name = "imu"
 
         termination_height = 0.2
         default_feet_height = 0.0
@@ -140,8 +53,8 @@ class asset(LeggedRobotCfg.asset):
         fix_base_link = False
 
     class terrain(LeggedRobotCfg.terrain):
-        # mesh_type = "plane"
-        mesh_type = "trimesh"
+        mesh_type = "plane"
+        # mesh_type = "trimesh"
         curriculum = False
         # rough terrain only:
         measure_heights = False
@@ -168,10 +81,6 @@ class noise_scales:
             quat = 0.03
             height_measurements = 0.1
 
-        # Currently unused
-        # class noise_ranges:
-        #     default_pos = 0.03  # +- 0.05 rad
-
     class init_state(LeggedRobotCfg.init_state):
         pos = [0.0, 0.0, Robot.height]
         rot = Robot.rotation
@@ -223,6 +132,7 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         # dynamic randomization
         action_noise = 0.02
         action_delay = 0.5
+        randomize_pd_gains = False
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -231,9 +141,9 @@ class commands(LeggedRobotCfg.commands):
         heading_command = True  # if true: compute ang vel command from heading error
 
         class ranges:
-            lin_vel_x = [-0.3, 0.6]  # min max [m/s]
-            lin_vel_y = [-0.3, 0.3]  # min max [m/s]
-            ang_vel_yaw = [-0.3, 0.3]  # min max [rad/s]
+            lin_vel_x = [-0.5, 1.0]  # min max [m/s]
+            lin_vel_y = [-0.5, 0.5]  # min max [m/s]
+            ang_vel_yaw = [-1.5, 1.5]  # min max [rad/s]
             heading = [-3.14, 3.14]
 
     class rewards:
@@ -242,8 +152,8 @@ class rewards:
         min_dist = 0.2
         max_dist = 0.5
         # put some settings here for LLM parameter tuning
-        target_joint_pos_scale = 0.17  # rad
-        target_feet_height = 0.05  # m
+        target_joint_pos_scale = 0.24  # rad
+        target_feet_height = 0.06  # m
 
         cycle_time = 0.4  # sec
         # if true negative total rewards are clipped at zero (avoids early termination problems)
@@ -316,8 +226,8 @@ class rewards:
         max_dist = 0.5
         # put some settings here for LLM parameter tuning
         target_joint_pos_scale = 0.17  # rad
-        target_feet_height = 0.05  # m
-        cycle_time = 0.5  # sec
+        target_feet_height = 0.06  # m
+        cycle_time = 0.4  # sec
         # if true negative total rewards are clipped at zero (avoids early termination problems)
         only_positive_rewards = False
         # tracking reward = exp(error*sigma)

diff --git a/sim/envs/humanoids/gpr_env.py b/sim/envs/humanoids/gpr_env.py
@@ -124,15 +124,17 @@ def compute_ref_state(self):
         scale_2 = 2 * scale_1
         # left foot stance phase set to default joint pos
         sin_pos_l[sin_pos_l > 0] = 0
-        self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1
+
+        self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += -sin_pos_l * scale_1
         self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2
-        self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1
+        self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 * -1  # I negated this
 
         # right foot stance phase set to default joint pos
         sin_pos_r[sin_pos_r < 0] = 0
-        self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1
-        self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2
-        self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1
+        # pfb30 last one should be -1
+        self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += -sin_pos_r * scale_1
+        self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += -sin_pos_r * scale_2 * -1  # I negated this
+        self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += -sin_pos_r * scale_1 * -1  # I negated this
 
         # Double support phase
         self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
@@ -180,11 +182,11 @@ def _get_noise_scale_vec(self, cfg):
         noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel
         noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0  # previous actions
         noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = (
+            noise_scales.quat * self.obs_scales.quat
+        )  # projected_gravity
+        noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5) + 6] = (
             noise_scales.ang_vel * self.obs_scales.ang_vel
         )  # ang vel
-        noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5)] = (
-            noise_scales.quat * self.obs_scales.quat
-        )  # euler x,y
         return noise_vec
 
     def compute_observations(self):
@@ -228,8 +230,8 @@ def compute_observations(self):
                 q,  # 12D
                 dq,  # 12D
                 self.actions,  # 12D
+                self.projected_gravity,  # 3
                 self.base_ang_vel * self.obs_scales.ang_vel,  # 3
-                self.base_euler_xyz * self.obs_scales.quat,  # 3
             ),
             dim=-1,
         )