From bafb56fd84d2da56816ac676c18f16bb7d4c9792 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Sun, 29 Dec 2024 16:40:28 -0800
Subject: [PATCH 01/43] kinfer module

---
 .gitmodules        | 5 ++++-
 sim/sim2sim.py     | 4 ++++
 third_party/kinfer | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)
 create mode 160000 third_party/kinfer

diff --git a/.gitmodules b/.gitmodules
index 1d08b1ee..4e07ec9f 100755
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "third_party/isaacgym"]
 	path = third_party/isaacgym
-	url = ../../kscalelabs/isaacgym.git
\ No newline at end of file
+	url = ../../kscalelabs/isaacgym.git
+[submodule "third_party/kinfer"]
+	path = third_party/kinfer
+	url = https://github.com/kscalelabs/kinfer.git
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index 82ce1137..3a9bb669 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -17,8 +17,12 @@
 import onnxruntime as ort
 import pygame
 import torch
+# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8
+import sys
+sys.path.append('third_party/kinfer')
 from kinfer.export.pytorch import export_to_onnx
 from kinfer.inference.python import ONNXModel
+
 from scipy.spatial.transform import Rotation as R
 from tqdm import tqdm
 
diff --git a/third_party/kinfer b/third_party/kinfer
new file mode 160000
index 00000000..acfa5f67
--- /dev/null
+++ b/third_party/kinfer
@@ -0,0 +1 @@
+Subproject commit acfa5f67aa26fedfc77c724358d3e4b35df7c365

From faa91193d5d1fe4f5bcff342de0cd8db70ec35ee Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Sun, 29 Dec 2024 17:37:50 -0800
Subject: [PATCH 02/43] ppo schema

---
 .gitignore             |   3 +
 sim/play.py            | 135 ++++++++++++++++++++++++++++++++---------
 sim/sim2sim.py         |   5 +-
 sim/utils/resources.py |   9 +++
 4 files changed, 123 insertions(+), 29 deletions(-)
 create mode 100644 sim/utils/resources.py

diff --git a/.gitignore b/.gitignore
index d09d6354..194270e2 100755
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,6 @@ wandb/
 runs/
 isaacgym/
 *.h5
+
+# dev
+ref/
diff --git a/sim/play.py b/sim/play.py
index 04465dd9..c65cfc90 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -18,9 +18,20 @@
 import h5py
 import numpy as np
 from isaacgym import gymapi
-from kinfer.export.pytorch import export_to_onnx
 from tqdm import tqdm
 
+# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8
+import sys
+
+# Get absolute path relative to this script
+kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'third_party', 'kinfer'))
+sys.path.append(kinfer_path)
+
+from kinfer import proto as P 
+from kinfer.export.python import export_model
+
+from sim.utils.resources import load_embodiment
+
 from sim.env import run_dir  # noqa: E402
 from sim.envs import task_registry  # noqa: E402
 
@@ -55,7 +66,7 @@ def play(args: argparse.Namespace) -> None:
     else:
         env_cfg.terrain.mesh_type = "plane"
     env_cfg.terrain.num_rows = 5
-    env_cfg.terrain.num_cols = 5
+    env_cfg.terrain.num_cxols = 5
     env_cfg.terrain.curriculum = False
     env_cfg.terrain.max_init_terrain_level = 5
     env_cfg.noise.add_noise = True
@@ -78,40 +89,108 @@ def play(args: argparse.Namespace) -> None:
     ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg)
     policy = ppo_runner.get_inference_policy(device=env.device)
 
+    embodiment = ppo_runner.cfg["experiment_name"].lower()
+
     # Export policy if needed
     if args.export_policy:
         path = os.path.join(".")
         export_policy_as_jit(ppo_runner.alg.actor_critic, path)
         print("Exported policy as jit script to: ", path)
 
-    # export policy as a onnx module (used to run it on web)
-    if args.export_onnx:
-        path = ppo_runner.load_path
-        embodiment = ppo_runner.cfg["experiment_name"].lower()
-        policy_cfg = ActorCfg(
-            embodiment=embodiment,
-            cycle_time=env_cfg.rewards.cycle_time,
-            sim_dt=env_cfg.sim.dt,
-            sim_decimation=env_cfg.control.decimation,
-            tau_factor=env_cfg.safety.torque_limit,
-            action_scale=env_cfg.control.action_scale,
-            lin_vel_scale=env_cfg.normalization.obs_scales.lin_vel,
-            ang_vel_scale=env_cfg.normalization.obs_scales.ang_vel,
-            quat_scale=env_cfg.normalization.obs_scales.quat,
-            dof_pos_scale=env_cfg.normalization.obs_scales.dof_pos,
-            dof_vel_scale=env_cfg.normalization.obs_scales.dof_vel,
-            frame_stack=env_cfg.env.frame_stack,
-            clip_observations=env_cfg.normalization.clip_observations,
-            clip_actions=env_cfg.normalization.clip_actions,
-        )
-        actor_model, sim2sim_info, input_tensors = get_actor_policy(path, policy_cfg)
-
-        # Merge policy_cfg and sim2sim_info into a single config object
-        export_config = {**vars(policy_cfg), **sim2sim_info}
+    # load the embodiment
+    robot = load_embodiment(embodiment)
+    JOINT_NAMES = robot.Robot.all_joints()
+
+    input_schema = P.IOSchema(
+        values=[
+            P.ValueSchema(
+                value_name="observation_buffer",
+                state_tensor=P.StateTensorSchema(
+                    # 11 is the number of single observation features - 6 from IMU, 5 from command input
+                    # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
+                    # -1 comes from the fact that the latest observation is the current state and not part of the buffer
+                    shape=[(env_cfg.env.frame_stack - 1) * (11 + env.num_dof * 3)],
+                    description="Buffer of previous observations"
+                )
+            ),
+            P.ValueSchema(
+                value_name="vector_command",
+                vector_command=P.VectorCommandSchema(
+                    dimensions=3,  # x_vel, y_vel, rot
+                )
+            ),
+            P.ValueSchema(
+                value_name="timestamp",
+                timestamp=P.TimestampSchema(
+                    unit=P.TimestampUnit.SECONDS,
+                    description="Current policy time in seconds"
+                )
+            ),
+            P.ValueSchema(
+                value_name="joint_positions",
+                joint_positions=P.JointPositionsSchema(
+                    joint_names=JOINT_NAMES,
+                    unit=P.JointPositionUnit.RADIANS,
+                )
+            ),
+            P.ValueSchema(
+                value_name="joint_velocities", 
+                joint_velocities=P.JointVelocitiesSchema(
+                    joint_names=JOINT_NAMES,
+                    unit=P.JointVelocityUnit.RADIANS_PER_SECOND,
+                )
+            ),
+            P.ValueSchema(
+                value_name="previous_actions",
+                joint_positions=P.JointPositionsSchema(
+                    joint_names=JOINT_NAMES,
+                    unit=P.JointPositionUnit.RADIANS
+                )
+            ),
+            # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data
+            P.ValueSchema(
+                value_name="imu_angular_velocity",
+                imu=P.ImuSchema(
+                    use_accelerometer=False,
+                    use_gyroscope=True,
+                    use_magnetometer=False,
+                )
+            ),
+            P.ValueSchema(
+                value_name="imu_orientation",
+                imu=P.ImuSchema(
+                    use_accelerometer=True,
+                    use_gyroscope=False,
+                    use_magnetometer=False,
+                )
+            )
+        ]
+    )
+
+    output_schema = P.IOSchema(
+        values=[
+            P.ValueSchema(
+                value_name="joint_positions",
+                joint_pos=P.JointPosSchema(
+                    joint_names=JOINT_NAMES,
+                    unit=P.JointPosUnit.RADIANS,
+                ),
+            )
+        ]
+    )
 
-        export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_policy.onnx")
-        print("Exported policy as kinfer-compatible onnx to: ", path)
+    # Create the full model schema
+    model_schema = P.ModelSchema(
+        input_schema=input_schema,
+        output_schema=output_schema
+    )
 
+    if args.export_onnx:
+        kinfer_policy = export_model(
+            model=policy,
+            schema=model_schema,
+        )
+        kinfer_policy.save_model("policy.kinfer")
     # Prepare for logging
     env_logger = Logger(env.dt)
     robot_index = 0
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index 3a9bb669..f47da5de 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -19,7 +19,10 @@
 import torch
 # Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8
 import sys
-sys.path.append('third_party/kinfer')
+
+kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'third_party', 'kinfer'))
+sys.path.append(kinfer_path)
+
 from kinfer.export.pytorch import export_to_onnx
 from kinfer.inference.python import ONNXModel
 
diff --git a/sim/utils/resources.py b/sim/utils/resources.py
new file mode 100644
index 00000000..84aafb80
--- /dev/null
+++ b/sim/utils/resources.py
@@ -0,0 +1,9 @@
+import importlib
+from typing import Any
+
+def load_embodiment(embodiment: str) -> Any:  # noqa: ANN401
+    # Dynamically import embodiment
+    module_name = f"sim.resources.{embodiment}.joints"
+    module = importlib.import_module(module_name)
+    robot = getattr(module, "Robot")
+    return robot

From 6e4988da3ccca8afe2445966d434cf89879f170f Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Sun, 29 Dec 2024 17:41:00 -0800
Subject: [PATCH 03/43] fix typo

---
 sim/play.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sim/play.py b/sim/play.py
index c65cfc90..c1faac16 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -66,7 +66,7 @@ def play(args: argparse.Namespace) -> None:
     else:
         env_cfg.terrain.mesh_type = "plane"
     env_cfg.terrain.num_rows = 5
-    env_cfg.terrain.num_cxols = 5
+    env_cfg.terrain.num_cols = 5
     env_cfg.terrain.curriculum = False
     env_cfg.terrain.max_init_terrain_level = 5
     env_cfg.noise.add_noise = True

From 983e010d542502e5b8eb0d2be1921c46ad2781d7 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Sun, 29 Dec 2024 18:12:59 -0800
Subject: [PATCH 04/43] get jit

---
 sim/play.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/sim/play.py b/sim/play.py
index c1faac16..68d12d6c 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -48,10 +48,13 @@
 def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None:
     os.makedirs(path, exist_ok=True)
     path = os.path.join(path, "policy_1.pt")
+    model = get_actor_jit(actor_critic)
+    model.save(path)
+
+def get_actor_jit(actor_critic: Any) -> Any:
     model = copy.deepcopy(actor_critic.actor).to("cpu")
     traced_script_module = torch.jit.script(model)
-    traced_script_module.save(path)
-
+    return traced_script_module
 
 def play(args: argparse.Namespace) -> None:
     logger.info("Configuring environment and training settings...")
@@ -186,8 +189,9 @@ def play(args: argparse.Namespace) -> None:
     )
 
     if args.export_onnx:
+        jit_policy = get_actor_jit(policy)
         kinfer_policy = export_model(
-            model=policy,
+            model=jit_policy,
             schema=model_schema,
         )
         kinfer_policy.save_model("policy.kinfer")

From b41d3df9064a009c6de0c917ddc298f736a43759 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Sun, 29 Dec 2024 18:26:57 -0800
Subject: [PATCH 05/43] save

---
 sim/play.py          | 3 ++-
 sim/requirements.txt | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sim/play.py b/sim/play.py
index 68d12d6c..56e86633 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -19,6 +19,7 @@
 import numpy as np
 from isaacgym import gymapi
 from tqdm import tqdm
+import onnx
 
 # Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8
 import sys
@@ -194,7 +195,7 @@ def play(args: argparse.Namespace) -> None:
             model=jit_policy,
             schema=model_schema,
         )
-        kinfer_policy.save_model("policy.kinfer")
+        onnx.save(kinfer_policy, "policy.kinfer")
     # Prepare for logging
     env_logger = Logger(env.dt)
     robot_index = 0
diff --git a/sim/requirements.txt b/sim/requirements.txt
index fbbdc4e6..ffac4a26 100755
--- a/sim/requirements.txt
+++ b/sim/requirements.txt
@@ -10,6 +10,7 @@ numpy==1.21.6
 wandb
 tensorboard==2.14.0
 onnxscript
+onnx
 mujoco==2.3.6
 kinfer==0.0.5
-opencv-python
\ No newline at end of file
+opencv-python

From f627fb3ff327e54d26a99f353c6fd712667b68b9 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Sun, 29 Dec 2024 18:27:59 -0800
Subject: [PATCH 06/43] lint

---
 sim/play.py            | 50 ++++++++++++++++++------------------------
 sim/sim2sim.py         |  8 +++----
 sim/utils/resources.py |  1 +
 3 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/sim/play.py b/sim/play.py
index 56e86633..d5b726bf 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -9,6 +9,9 @@
 import logging
 import math
 import os
+
+# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8
+import sys
 import time
 import uuid
 from datetime import datetime
@@ -17,22 +20,17 @@
 import cv2
 import h5py
 import numpy as np
+import onnx
 from isaacgym import gymapi
 from tqdm import tqdm
-import onnx
-
-# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8
-import sys
 
 # Get absolute path relative to this script
-kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'third_party', 'kinfer'))
+kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer"))
 sys.path.append(kinfer_path)
 
-from kinfer import proto as P 
+from kinfer import proto as P
 from kinfer.export.python import export_model
 
-from sim.utils.resources import load_embodiment
-
 from sim.env import run_dir  # noqa: E402
 from sim.envs import task_registry  # noqa: E402
 
@@ -40,6 +38,7 @@
 from sim.model_export import ActorCfg, get_actor_policy
 from sim.utils.helpers import get_args  # noqa: E402
 from sim.utils.logger import Logger  # noqa: E402
+from sim.utils.resources import load_embodiment
 
 import torch  # special case with isort: skip comment
 
@@ -52,11 +51,13 @@ def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> No
     model = get_actor_jit(actor_critic)
     model.save(path)
 
+
 def get_actor_jit(actor_critic: Any) -> Any:
     model = copy.deepcopy(actor_critic.actor).to("cpu")
     traced_script_module = torch.jit.script(model)
     return traced_script_module
 
+
 def play(args: argparse.Namespace) -> None:
     logger.info("Configuring environment and training settings...")
     env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
@@ -114,42 +115,36 @@ def play(args: argparse.Namespace) -> None:
                     # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
                     # -1 comes from the fact that the latest observation is the current state and not part of the buffer
                     shape=[(env_cfg.env.frame_stack - 1) * (11 + env.num_dof * 3)],
-                    description="Buffer of previous observations"
-                )
+                    description="Buffer of previous observations",
+                ),
             ),
             P.ValueSchema(
                 value_name="vector_command",
                 vector_command=P.VectorCommandSchema(
                     dimensions=3,  # x_vel, y_vel, rot
-                )
+                ),
             ),
             P.ValueSchema(
                 value_name="timestamp",
-                timestamp=P.TimestampSchema(
-                    unit=P.TimestampUnit.SECONDS,
-                    description="Current policy time in seconds"
-                )
+                timestamp=P.TimestampSchema(unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"),
             ),
             P.ValueSchema(
                 value_name="joint_positions",
                 joint_positions=P.JointPositionsSchema(
                     joint_names=JOINT_NAMES,
                     unit=P.JointPositionUnit.RADIANS,
-                )
+                ),
             ),
             P.ValueSchema(
-                value_name="joint_velocities", 
+                value_name="joint_velocities",
                 joint_velocities=P.JointVelocitiesSchema(
                     joint_names=JOINT_NAMES,
                     unit=P.JointVelocityUnit.RADIANS_PER_SECOND,
-                )
+                ),
             ),
             P.ValueSchema(
                 value_name="previous_actions",
-                joint_positions=P.JointPositionsSchema(
-                    joint_names=JOINT_NAMES,
-                    unit=P.JointPositionUnit.RADIANS
-                )
+                joint_positions=P.JointPositionsSchema(joint_names=JOINT_NAMES, unit=P.JointPositionUnit.RADIANS),
             ),
             # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data
             P.ValueSchema(
@@ -158,7 +153,7 @@ def play(args: argparse.Namespace) -> None:
                     use_accelerometer=False,
                     use_gyroscope=True,
                     use_magnetometer=False,
-                )
+                ),
             ),
             P.ValueSchema(
                 value_name="imu_orientation",
@@ -166,8 +161,8 @@ def play(args: argparse.Namespace) -> None:
                     use_accelerometer=True,
                     use_gyroscope=False,
                     use_magnetometer=False,
-                )
-            )
+                ),
+            ),
         ]
     )
 
@@ -184,10 +179,7 @@ def play(args: argparse.Namespace) -> None:
     )
 
     # Create the full model schema
-    model_schema = P.ModelSchema(
-        input_schema=input_schema,
-        output_schema=output_schema
-    )
+    model_schema = P.ModelSchema(input_schema=input_schema, output_schema=output_schema)
 
     if args.export_onnx:
         jit_policy = get_actor_jit(policy)
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index f47da5de..467113eb 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -7,6 +7,9 @@
 import argparse
 import math
 import os
+
+# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8
+import sys
 from copy import deepcopy
 from dataclasses import dataclass
 from typing import Dict, List, Tuple, Union
@@ -17,15 +20,12 @@
 import onnxruntime as ort
 import pygame
 import torch
-# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8
-import sys
 
-kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'third_party', 'kinfer'))
+kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer"))
 sys.path.append(kinfer_path)
 
 from kinfer.export.pytorch import export_to_onnx
 from kinfer.inference.python import ONNXModel
-
 from scipy.spatial.transform import Rotation as R
 from tqdm import tqdm
 
diff --git a/sim/utils/resources.py b/sim/utils/resources.py
index 84aafb80..3d353f2c 100644
--- a/sim/utils/resources.py
+++ b/sim/utils/resources.py
@@ -1,6 +1,7 @@
 import importlib
 from typing import Any
 
+
 def load_embodiment(embodiment: str) -> Any:  # noqa: ANN401
     # Dynamically import embodiment
     module_name = f"sim.resources.{embodiment}.joints"

From d36343784c5c1fae5affc8000893f222cd8fe077 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Sun, 29 Dec 2024 20:35:30 -0800
Subject: [PATCH 07/43] move schema definition

---
 sim/envs/base/legged_robot_config.py |  9 ++++
 sim/envs/humanoids/gpr_config.py     | 71 ++++++++++++++++++++++++
 sim/play.py                          | 81 ++--------------------------
 3 files changed, 83 insertions(+), 78 deletions(-)

diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
index e87d2252..f0854b26 100644
--- a/sim/envs/base/legged_robot_config.py
+++ b/sim/envs/base/legged_robot_config.py
@@ -30,9 +30,16 @@
 # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 
 from enum import Enum
+import os
 
 from sim.envs.base.base_config import BaseConfig
 
+import sys
+
+kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer"))
+sys.path.append(kinfer_path)
+from kinfer import proto
+
 
 class LeggedRobotCfg(BaseConfig):
     class env:
@@ -44,6 +51,8 @@ class env:
         send_timeouts = True  # send time out information to the algorithm
         episode_length_s = 20  # episode length in seconds
 
+        P = proto
+
     class terrain:
         mesh_type = "trimesh"  # "heightfield" # none, plane, heightfield or trimesh
         horizontal_scale = 0.1  # [m]
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index c59372d3..134c376c 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -26,6 +26,77 @@ class env(LeggedRobotCfg.env):
         episode_length_s = 24  # episode length in seconds
         use_ref_actions = False
 
+        P = LeggedRobotCfg.env.P
+
+        input_schema = P.IOSchema(
+            values=[
+                P.ValueSchema(
+                    value_name="observation_buffer",
+                        state_tensor=P.StateTensorSchema(
+                            # 11 is the number of single observation features - 6 from IMU, 5 from command input
+                            # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
+                            # -1 comes from the fact that the latest observation is the current state and not part of the buffer
+                            shape=[(frame_stack - 1) * (11 + NUM_JOINTS * 3)],
+                            description="Buffer of previous observations",
+                        ),
+                    ),
+                    P.ValueSchema(
+                        value_name="vector_command",
+                        vector_command=P.VectorCommandSchema(
+                            dimensions=3,  # x_vel, y_vel, rot
+                        ),
+                    ),
+                    P.ValueSchema(
+                        value_name="timestamp",
+                        timestamp=P.TimestampSchema(unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"),
+                    ),
+                    P.ValueSchema(
+                        value_name="joint_positions",
+                        joint_positions=P.JointPositionsSchema(
+                            joint_names=Robot.all_joints(),
+                            unit=P.JointPositionUnit.RADIANS,
+                        ),
+                    ),
+                    P.ValueSchema(
+                        value_name="joint_velocities",
+                        joint_velocities=P.JointVelocitiesSchema(
+                            joint_names=Robot.all_joints(),
+                            unit=P.JointVelocityUnit.RADIANS_PER_SECOND,
+                        ),
+                    ),
+                    P.ValueSchema(
+                        value_name="previous_actions",
+                        joint_positions=P.JointPositionsSchema(joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS),
+                    ),
+                    # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data
+                    P.ValueSchema(
+                        value_name="imu_angular_velocity",
+                        imu=P.ImuSchema(
+                            use_accelerometer=False,
+                            use_gyroscope=True,
+                            use_magnetometer=False,
+                        ),
+                    ),
+                    P.ValueSchema(
+                        value_name="imu_orientation",
+                        imu=P.ImuSchema(
+                            use_accelerometer=True,
+                            use_gyroscope=False,
+                            use_magnetometer=False,
+                        ),
+                    ),
+                ]
+            )
+        
+        output_schema = P.IOSchema(
+            values=[
+                P.ValueSchema(
+                    value_name="joint_positions",
+                    joint_positions=P.JointPositionsSchema(joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS),
+                )
+            ]
+        )
+
     class safety(LeggedRobotCfg.safety):
         # safety factors
         pos_limit = 1.0
diff --git a/sim/play.py b/sim/play.py
index d5b726bf..aa1041ab 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -94,92 +94,17 @@ def play(args: argparse.Namespace) -> None:
     ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg)
     policy = ppo_runner.get_inference_policy(device=env.device)
 
-    embodiment = ppo_runner.cfg["experiment_name"].lower()
-
     # Export policy if needed
     if args.export_policy:
         path = os.path.join(".")
         export_policy_as_jit(ppo_runner.alg.actor_critic, path)
         print("Exported policy as jit script to: ", path)
 
-    # load the embodiment
-    robot = load_embodiment(embodiment)
-    JOINT_NAMES = robot.Robot.all_joints()
-
-    input_schema = P.IOSchema(
-        values=[
-            P.ValueSchema(
-                value_name="observation_buffer",
-                state_tensor=P.StateTensorSchema(
-                    # 11 is the number of single observation features - 6 from IMU, 5 from command input
-                    # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
-                    # -1 comes from the fact that the latest observation is the current state and not part of the buffer
-                    shape=[(env_cfg.env.frame_stack - 1) * (11 + env.num_dof * 3)],
-                    description="Buffer of previous observations",
-                ),
-            ),
-            P.ValueSchema(
-                value_name="vector_command",
-                vector_command=P.VectorCommandSchema(
-                    dimensions=3,  # x_vel, y_vel, rot
-                ),
-            ),
-            P.ValueSchema(
-                value_name="timestamp",
-                timestamp=P.TimestampSchema(unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"),
-            ),
-            P.ValueSchema(
-                value_name="joint_positions",
-                joint_positions=P.JointPositionsSchema(
-                    joint_names=JOINT_NAMES,
-                    unit=P.JointPositionUnit.RADIANS,
-                ),
-            ),
-            P.ValueSchema(
-                value_name="joint_velocities",
-                joint_velocities=P.JointVelocitiesSchema(
-                    joint_names=JOINT_NAMES,
-                    unit=P.JointVelocityUnit.RADIANS_PER_SECOND,
-                ),
-            ),
-            P.ValueSchema(
-                value_name="previous_actions",
-                joint_positions=P.JointPositionsSchema(joint_names=JOINT_NAMES, unit=P.JointPositionUnit.RADIANS),
-            ),
-            # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data
-            P.ValueSchema(
-                value_name="imu_angular_velocity",
-                imu=P.ImuSchema(
-                    use_accelerometer=False,
-                    use_gyroscope=True,
-                    use_magnetometer=False,
-                ),
-            ),
-            P.ValueSchema(
-                value_name="imu_orientation",
-                imu=P.ImuSchema(
-                    use_accelerometer=True,
-                    use_gyroscope=False,
-                    use_magnetometer=False,
-                ),
-            ),
-        ]
-    )
-
-    output_schema = P.IOSchema(
-        values=[
-            P.ValueSchema(
-                value_name="joint_positions",
-                joint_pos=P.JointPosSchema(
-                    joint_names=JOINT_NAMES,
-                    unit=P.JointPosUnit.RADIANS,
-                ),
-            )
-        ]
-    )
+    if env_cfg.env.input_schema is None or env_cfg.env.output_schema is None:
+        raise ValueError("Input or output schema is None")
 
     # Create the full model schema
-    model_schema = P.ModelSchema(input_schema=input_schema, output_schema=output_schema)
+    model_schema = P.ModelSchema(input_schema=env_cfg.env.input_schema, output_schema=env_cfg.env.output_schema)
 
     if args.export_onnx:
         jit_policy = get_actor_jit(policy)

From 9942d79f9ff4fd2f576953e5310f002daeff01c9 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Sun, 29 Dec 2024 20:36:23 -0800
Subject: [PATCH 08/43] fmt

---
 sim/envs/base/legged_robot_config.py |   5 +-
 sim/envs/humanoids/gpr_config.py     | 104 ++++++++++++++-------------
 2 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
index f0854b26..44a747af 100644
--- a/sim/envs/base/legged_robot_config.py
+++ b/sim/envs/base/legged_robot_config.py
@@ -29,13 +29,12 @@
 #
 # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 
-from enum import Enum
 import os
+import sys
+from enum import Enum
 
 from sim.envs.base.base_config import BaseConfig
 
-import sys
-
 kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer"))
 sys.path.append(kinfer_path)
 from kinfer import proto
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 134c376c..cf569de3 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -32,67 +32,73 @@ class env(LeggedRobotCfg.env):
             values=[
                 P.ValueSchema(
                     value_name="observation_buffer",
-                        state_tensor=P.StateTensorSchema(
-                            # 11 is the number of single observation features - 6 from IMU, 5 from command input
-                            # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
-                            # -1 comes from the fact that the latest observation is the current state and not part of the buffer
-                            shape=[(frame_stack - 1) * (11 + NUM_JOINTS * 3)],
-                            description="Buffer of previous observations",
-                        ),
+                    state_tensor=P.StateTensorSchema(
+                        # 11 is the number of single observation features - 6 from IMU, 5 from command input
+                        # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
+                        # -1 comes from the fact that the latest observation is the current state and not part of the buffer
+                        shape=[(frame_stack - 1) * (11 + NUM_JOINTS * 3)],
+                        description="Buffer of previous observations",
                     ),
-                    P.ValueSchema(
-                        value_name="vector_command",
-                        vector_command=P.VectorCommandSchema(
-                            dimensions=3,  # x_vel, y_vel, rot
-                        ),
+                ),
+                P.ValueSchema(
+                    value_name="vector_command",
+                    vector_command=P.VectorCommandSchema(
+                        dimensions=3,  # x_vel, y_vel, rot
                     ),
-                    P.ValueSchema(
-                        value_name="timestamp",
-                        timestamp=P.TimestampSchema(unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"),
+                ),
+                P.ValueSchema(
+                    value_name="timestamp",
+                    timestamp=P.TimestampSchema(
+                        unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"
                     ),
-                    P.ValueSchema(
-                        value_name="joint_positions",
-                        joint_positions=P.JointPositionsSchema(
-                            joint_names=Robot.all_joints(),
-                            unit=P.JointPositionUnit.RADIANS,
-                        ),
+                ),
+                P.ValueSchema(
+                    value_name="joint_positions",
+                    joint_positions=P.JointPositionsSchema(
+                        joint_names=Robot.all_joints(),
+                        unit=P.JointPositionUnit.RADIANS,
                     ),
-                    P.ValueSchema(
-                        value_name="joint_velocities",
-                        joint_velocities=P.JointVelocitiesSchema(
-                            joint_names=Robot.all_joints(),
-                            unit=P.JointVelocityUnit.RADIANS_PER_SECOND,
-                        ),
+                ),
+                P.ValueSchema(
+                    value_name="joint_velocities",
+                    joint_velocities=P.JointVelocitiesSchema(
+                        joint_names=Robot.all_joints(),
+                        unit=P.JointVelocityUnit.RADIANS_PER_SECOND,
                     ),
-                    P.ValueSchema(
-                        value_name="previous_actions",
-                        joint_positions=P.JointPositionsSchema(joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS),
+                ),
+                P.ValueSchema(
+                    value_name="previous_actions",
+                    joint_positions=P.JointPositionsSchema(
+                        joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS
                     ),
-                    # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data
-                    P.ValueSchema(
-                        value_name="imu_angular_velocity",
-                        imu=P.ImuSchema(
-                            use_accelerometer=False,
-                            use_gyroscope=True,
-                            use_magnetometer=False,
-                        ),
+                ),
+                # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data
+                P.ValueSchema(
+                    value_name="imu_angular_velocity",
+                    imu=P.ImuSchema(
+                        use_accelerometer=False,
+                        use_gyroscope=True,
+                        use_magnetometer=False,
                     ),
-                    P.ValueSchema(
-                        value_name="imu_orientation",
-                        imu=P.ImuSchema(
-                            use_accelerometer=True,
-                            use_gyroscope=False,
-                            use_magnetometer=False,
-                        ),
+                ),
+                P.ValueSchema(
+                    value_name="imu_orientation",
+                    imu=P.ImuSchema(
+                        use_accelerometer=True,
+                        use_gyroscope=False,
+                        use_magnetometer=False,
                     ),
-                ]
-            )
-        
+                ),
+            ]
+        )
+
         output_schema = P.IOSchema(
             values=[
                 P.ValueSchema(
                     value_name="joint_positions",
-                    joint_positions=P.JointPositionsSchema(joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS),
+                    joint_positions=P.JointPositionsSchema(
+                        joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS
+                    ),
                 )
             ]
         )

From 64d42c11dd89692cbc3c018d4c93d9f26da3cff3 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 12:45:06 -0800
Subject: [PATCH 09/43] fix schema

---
 sim/envs/humanoids/gpr_config.py | 42 ++++++++++++++++++++------------
 sim/model_export.py              |  6 ++---
 sim/play.py                      | 19 ++++++++++++++-
 3 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index cf569de3..658f0dc3 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -30,16 +30,6 @@ class env(LeggedRobotCfg.env):
 
         input_schema = P.IOSchema(
             values=[
-                P.ValueSchema(
-                    value_name="observation_buffer",
-                    state_tensor=P.StateTensorSchema(
-                        # 11 is the number of single observation features - 6 from IMU, 5 from command input
-                        # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
-                        # -1 comes from the fact that the latest observation is the current state and not part of the buffer
-                        shape=[(frame_stack - 1) * (11 + NUM_JOINTS * 3)],
-                        description="Buffer of previous observations",
-                    ),
-                ),
                 P.ValueSchema(
                     value_name="vector_command",
                     vector_command=P.VectorCommandSchema(
@@ -47,7 +37,7 @@ class env(LeggedRobotCfg.env):
                     ),
                 ),
                 P.ValueSchema(
-                    value_name="timestamp",
+                    value_name="t",
                     timestamp=P.TimestampSchema(
                         unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"
                     ),
@@ -74,7 +64,7 @@ class env(LeggedRobotCfg.env):
                 ),
                 # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data
                 P.ValueSchema(
-                    value_name="imu_angular_velocity",
+                    value_name="imu_ang_vel",
                     imu=P.ImuSchema(
                         use_accelerometer=False,
                         use_gyroscope=True,
@@ -82,24 +72,46 @@ class env(LeggedRobotCfg.env):
                     ),
                 ),
                 P.ValueSchema(
-                    value_name="imu_orientation",
+                    value_name="imu_euler_xyz",
                     imu=P.ImuSchema(
                         use_accelerometer=True,
                         use_gyroscope=False,
                         use_magnetometer=False,
                     ),
                 ),
+                P.ValueSchema(
+                    value_name="buffer",
+                    state_tensor=P.StateTensorSchema(
+                        # 11 is the number of single observation features - 6 from IMU, 5 from command input
+                        # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
+                        shape=[frame_stack * (11 + NUM_JOINTS * 3)],
+                        description="Buffer of previous observations",
+                    ),
+                ),
             ]
         )
 
         output_schema = P.IOSchema(
             values=[
                 P.ValueSchema(
-                    value_name="joint_positions",
+                    value_name="actions",
+                    joint_positions=P.JointPositionsSchema(
+                        joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS
+                    ),
+                ),
+                P.ValueSchema(
+                    value_name="actions_raw",
                     joint_positions=P.JointPositionsSchema(
                         joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS
                     ),
-                )
+                ),
+                P.ValueSchema(
+                    value_name="buffer",
+                    state_tensor=P.StateTensorSchema(
+                        shape=[frame_stack * (11 + NUM_JOINTS * 3)],
+                        description="Updated buffer of observations",
+                    ),
+                ),
             ]
         )
 
diff --git a/sim/model_export.py b/sim/model_export.py
index 3777124a..64362b67 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -125,9 +125,7 @@ def get_init_buffer(self) -> Tensor:
 
     def forward(
         self,
-        x_vel: Tensor,  # x-coordinate of the target velocity
-        y_vel: Tensor,  # y-coordinate of the target velocity
-        rot: Tensor,  # target angular velocity
+        vector_command: Tensor,  # (x_vel, y_vel, rot)
         t: Tensor,  # current policy time (sec)
         dof_pos: Tensor,  # current angular position of the DoFs relative to default
         dof_vel: Tensor,  # current angular velocity of the DoFs
@@ -163,6 +161,8 @@ def forward(
         sin_pos = torch.sin(2 * torch.pi * t / self.cycle_time)
         cos_pos = torch.cos(2 * torch.pi * t / self.cycle_time)
 
+        x_vel, y_vel, rot = vector_command.split(1)
+
         # Construct command input
         command_input = torch.cat(
             (
diff --git a/sim/play.py b/sim/play.py
index aa1041ab..e905fcd6 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -107,10 +107,27 @@ def play(args: argparse.Namespace) -> None:
     model_schema = P.ModelSchema(input_schema=env_cfg.env.input_schema, output_schema=env_cfg.env.output_schema)
 
     if args.export_onnx:
-        jit_policy = get_actor_jit(policy)
+        actor_cfg = ActorCfg(
+            embodiment=ppo_runner.cfg["experiment_name"].lower(),
+            cycle_time=env_cfg.rewards.cycle_time,
+            sim_dt=env_cfg.sim.dt,
+            sim_decimation=env_cfg.control.decimation,
+            tau_factor=env_cfg.safety.torque_limit,
+            action_scale=env_cfg.control.action_scale,
+            lin_vel_scale=env_cfg.normalization.obs_scales.lin_vel,
+            ang_vel_scale=env_cfg.normalization.obs_scales.ang_vel,
+            quat_scale=env_cfg.normalization.obs_scales.quat,
+            dof_pos_scale=env_cfg.normalization.obs_scales.dof_pos,
+            dof_vel_scale=env_cfg.normalization.obs_scales.dof_vel,
+            frame_stack=env_cfg.env.frame_stack,
+            clip_observations=env_cfg.normalization.clip_observations,
+            clip_actions=env_cfg.normalization.clip_actions,
+        )
+        jit_policy, metadata, _ = get_actor_policy(ppo_runner.load_path, actor_cfg)
         kinfer_policy = export_model(
             model=jit_policy,
             schema=model_schema,
+            metadata=metadata,
         )
         onnx.save(kinfer_policy, "policy.kinfer")
     # Prepare for logging

From 3f04f24120b37bf818bc337d4357f0903d152b5c Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 14:38:01 -0800
Subject: [PATCH 10/43] minor fixes

---
 sim/envs/humanoids/gpr_config.py | 4 +---
 sim/play.py                      | 2 +-
 sim/utils/helpers.py             | 3 ++-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 658f0dc3..680f4626 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -39,7 +39,7 @@ class env(LeggedRobotCfg.env):
                 P.ValueSchema(
                     value_name="t",
                     timestamp=P.TimestampSchema(
-                        unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"
+                        start_seconds=0,
                     ),
                 ),
                 P.ValueSchema(
@@ -85,7 +85,6 @@ class env(LeggedRobotCfg.env):
                         # 11 is the number of single observation features - 6 from IMU, 5 from command input
                         # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
                         shape=[frame_stack * (11 + NUM_JOINTS * 3)],
-                        description="Buffer of previous observations",
                     ),
                 ),
             ]
@@ -109,7 +108,6 @@ class env(LeggedRobotCfg.env):
                     value_name="buffer",
                     state_tensor=P.StateTensorSchema(
                         shape=[frame_stack * (11 + NUM_JOINTS * 3)],
-                        description="Updated buffer of observations",
                     ),
                 ),
             ]
diff --git a/sim/play.py b/sim/play.py
index e905fcd6..71579476 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -29,7 +29,7 @@
 sys.path.append(kinfer_path)
 
 from kinfer import proto as P
-from kinfer.export.python import export_model
+from kinfer.export.pytorch import export_model
 
 from sim.env import run_dir  # noqa: E402
 from sim.envs import task_registry  # noqa: E402
diff --git a/sim/utils/helpers.py b/sim/utils/helpers.py
index 2724896f..e985cb72 100755
--- a/sim/utils/helpers.py
+++ b/sim/utils/helpers.py
@@ -47,7 +47,8 @@ def class_to_dict(obj) -> dict:
         return obj
     result = {}
     for key in dir(obj):
-        if key.startswith("_"):
+        # Skip private attributes and known problematic ones
+        if key.startswith("_") or key in ["P", "input_schema", "output_schema"]:
             continue
         element = []
         val = getattr(obj, key)

From 8f904caed6f2844a1dc40ee42ee5e775cce2b37f Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 14:39:21 -0800
Subject: [PATCH 11/43] fix names

---
 sim/envs/humanoids/gpr_config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 680f4626..9dc62731 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -43,21 +43,21 @@ class env(LeggedRobotCfg.env):
                     ),
                 ),
                 P.ValueSchema(
-                    value_name="joint_positions",
+                    value_name="dof_pos",
                     joint_positions=P.JointPositionsSchema(
                         joint_names=Robot.all_joints(),
                         unit=P.JointPositionUnit.RADIANS,
                     ),
                 ),
                 P.ValueSchema(
-                    value_name="joint_velocities",
+                    value_name="dof_vel",
                     joint_velocities=P.JointVelocitiesSchema(
                         joint_names=Robot.all_joints(),
                         unit=P.JointVelocityUnit.RADIANS_PER_SECOND,
                     ),
                 ),
                 P.ValueSchema(
-                    value_name="previous_actions",
+                    value_name="prev_actions",
                     joint_positions=P.JointPositionsSchema(
                         joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS
                     ),

From 8f120bb94e571836191597f51dbfa991bca0fc2c Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 15:07:58 -0800
Subject: [PATCH 12/43] tensors

---
 sim/envs/humanoids/gpr_config.py | 2 ++
 sim/model_export.py              | 8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 9dc62731..24611fd3 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -85,6 +85,7 @@ class env(LeggedRobotCfg.env):
                         # 11 is the number of single observation features - 6 from IMU, 5 from command input
                         # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
                         shape=[frame_stack * (11 + NUM_JOINTS * 3)],
+                        dtype=P.DType.FP32,
                     ),
                 ),
             ]
@@ -108,6 +109,7 @@ class env(LeggedRobotCfg.env):
                     value_name="buffer",
                     state_tensor=P.StateTensorSchema(
                         shape=[frame_stack * (11 + NUM_JOINTS * 3)],
+                        dtype=P.DType.FP32,
                     ),
                 ),
             ]
diff --git a/sim/model_export.py b/sim/model_export.py
index 64362b67..bc53a602 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -3,7 +3,7 @@
 import re
 from dataclasses import dataclass, fields
 from io import BytesIO
-from typing import List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple
 
 import onnx
 import onnxruntime as ort
@@ -133,7 +133,7 @@ def forward(
         imu_ang_vel: Tensor,  # angular velocity of the IMU
         imu_euler_xyz: Tensor,  # euler angles of the IMU
         buffer: Tensor,  # buffer of previous observations
-    ) -> Tuple[Tensor, Tensor, Tensor]:
+    ) -> Dict[str, Tensor]:
         """Runs the actor model forward pass.
 
         Args:
@@ -206,7 +206,9 @@ def forward(
         actions = self.policy(policy_input).squeeze(0)
         actions_scaled = actions * self.action_scale
 
-        return actions_scaled, actions, x
+        return {"actions": actions_scaled,
+                "actions_raw": actions,
+                "buffer": x}
 
 
 def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]:

From 31dd3790f36ecc66f1adc71173047ece3fc4c780 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 15:09:22 -0800
Subject: [PATCH 13/43] squeeze

---
 sim/model_export.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sim/model_export.py b/sim/model_export.py
index bc53a602..2c645c63 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -186,8 +186,8 @@ def forward(
                 q,
                 dq,
                 prev_actions,
-                imu_ang_vel * self.ang_vel_scale,
-                imu_euler_xyz * self.quat_scale,
+                imu_ang_vel.squeeze(0) * self.ang_vel_scale,
+                imu_euler_xyz.squeeze(0) * self.quat_scale,
             ),
             dim=0,
         )

From 7a0a1c394866d28fe756c062de76c7fb23595803 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 15:17:53 -0800
Subject: [PATCH 14/43] fix imports

---
 sim/envs/base/legged_robot_config.py | 6 +-----
 sim/envs/humanoids/gpr_config.py     | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
index 44a747af..e427c99f 100644
--- a/sim/envs/base/legged_robot_config.py
+++ b/sim/envs/base/legged_robot_config.py
@@ -35,10 +35,8 @@
 
 from sim.envs.base.base_config import BaseConfig
 
-kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer"))
+kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "third_party", "kinfer"))
 sys.path.append(kinfer_path)
-from kinfer import proto
-
 
 class LeggedRobotCfg(BaseConfig):
     class env:
@@ -50,8 +48,6 @@ class env:
         send_timeouts = True  # send time out information to the algorithm
         episode_length_s = 20  # episode length in seconds
 
-        P = proto
-
     class terrain:
         mesh_type = "trimesh"  # "heightfield" # none, plane, heightfield or trimesh
         horizontal_scale = 0.1  # [m]
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 24611fd3..78637d03 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -26,7 +26,7 @@ class env(LeggedRobotCfg.env):
         episode_length_s = 24  # episode length in seconds
         use_ref_actions = False
 
-        P = LeggedRobotCfg.env.P
+        from kinfer import proto as P
 
         input_schema = P.IOSchema(
             values=[

From a289ee8c0eaeeec412dbc142fb2abe3b90b16276 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 15:25:50 -0800
Subject: [PATCH 15/43] pin 3.8 compatible kinfer

---
 third_party/kinfer | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/kinfer b/third_party/kinfer
index acfa5f67..13e24b7b 160000
--- a/third_party/kinfer
+++ b/third_party/kinfer
@@ -1 +1 @@
-Subproject commit acfa5f67aa26fedfc77c724358d3e4b35df7c365
+Subproject commit 13e24b7bc5104dc6be8a6e49c95912ab7c80aaea

From 26466b809612dee7486b283484028b97dbe5f66d Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 15:28:52 -0800
Subject: [PATCH 16/43] lint

---
 sim/envs/base/legged_robot_config.py | 1 +
 sim/model_export.py                  | 8 ++------
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
index e427c99f..5e8b7a7c 100644
--- a/sim/envs/base/legged_robot_config.py
+++ b/sim/envs/base/legged_robot_config.py
@@ -38,6 +38,7 @@
 kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "third_party", "kinfer"))
 sys.path.append(kinfer_path)
 
+
 class LeggedRobotCfg(BaseConfig):
     class env:
         num_envs = 4096
diff --git a/sim/model_export.py b/sim/model_export.py
index 2c645c63..a7d384c0 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -137,9 +137,7 @@ def forward(
         """Runs the actor model forward pass.
 
         Args:
-            x_vel: The x-coordinate of the target velocity, with shape (1).
-            y_vel: The y-coordinate of the target velocity, with shape (1).
-            rot: The target angular velocity, with shape (1).
+            vector_command: The target velocity vector, with shape (3). It consistes of x_vel, y_vel, and rot.
             t: The current policy time step, with shape (1).
             dof_pos: The current angular position of the DoFs relative to default, with shape (num_actions).
             dof_vel: The current angular velocity of the DoFs, with shape (num_actions).
@@ -206,9 +204,7 @@ def forward(
         actions = self.policy(policy_input).squeeze(0)
         actions_scaled = actions * self.action_scale
 
-        return {"actions": actions_scaled,
-                "actions_raw": actions,
-                "buffer": x}
+        return {"actions": actions_scaled, "actions_raw": actions, "buffer": x}
 
 
 def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]:

From d041f50a0a2c3d32d62b691711674398a012768d Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 21:16:09 -0800
Subject: [PATCH 17/43] joints

---
 sim/resources/gpr/joints.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py
index 3c4dd5d5..f3e937ee 100755
--- a/sim/resources/gpr/joints.py
+++ b/sim/resources/gpr/joints.py
@@ -155,6 +155,16 @@ def stiffness(cls) -> Dict[str, float]:
             "ankle_y": 40,
         }
 
+    @classmethod
+    def stiffness_mapping(cls) -> Dict[str, float]:
+        mapping = {}
+        stiffness = cls.stiffness()
+        breakpoint()
+        for side in ["L", "R"]:
+            for joint, value in stiffness.items():
+                mapping[f"{side}_{joint}"] = value
+        return mapping
+
     # d_gains
     @classmethod
     def damping(cls) -> Dict[str, float]:

From 4a608b03325a9fcf588f0ccb9201b2188f93302a Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 21:59:45 -0800
Subject: [PATCH 18/43] update local kinfer

---
 third_party/kinfer | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/kinfer b/third_party/kinfer
index 13e24b7b..e05bfab1 160000
--- a/third_party/kinfer
+++ b/third_party/kinfer
@@ -1 +1 @@
-Subproject commit 13e24b7bc5104dc6be8a6e49c95912ab7c80aaea
+Subproject commit e05bfab1cde94e2ff607b3dff7a5604ef0edcd39

From 07a9b242ee14e6fcdb1a9c8ed675597c39587abf Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Mon, 30 Dec 2024 22:02:29 -0800
Subject: [PATCH 19/43] update metadata + path

---
 sim/envs/base/legged_robot_config.py |  4 ---
 sim/model_export.py                  | 39 +++++++++++++++++++---------
 sim/play.py                          |  4 ---
 sim/resources/gpr/joints.py          | 19 +++++++++++++-
 sim/sim2sim.py                       |  3 ---
 5 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
index 5e8b7a7c..265ff995 100644
--- a/sim/envs/base/legged_robot_config.py
+++ b/sim/envs/base/legged_robot_config.py
@@ -35,10 +35,6 @@
 
 from sim.envs.base.base_config import BaseConfig
 
-kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "third_party", "kinfer"))
-sys.path.append(kinfer_path)
-
-
 class LeggedRobotCfg(BaseConfig):
     class env:
         num_envs = 4096
diff --git a/sim/model_export.py b/sim/model_export.py
index a7d384c0..26d2a4d7 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -237,24 +237,39 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
     input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, imu_ang_vel, imu_euler_xyz, buffer)
 
     # Add sim2sim metadata
-    robot_effort = list(a_model.robot.effort().values())
-    robot_stiffness = list(a_model.robot.stiffness().values())
-    robot_damping = list(a_model.robot.damping().values())
+    robot = a_model.robot
+    robot_effort = robot.effort_mapping()
+    robot_stiffness = robot.stiffness_mapping()
+    robot_damping = robot.damping_mapping()
     num_actions = a_model.num_actions
     num_observations = a_model.num_observations
 
-    default_standing = list(a_model.robot.default_standing().values())
+    default_standing = robot.default_standing()
+
+    metadata = {
+        "num_actions": num_actions,
+        "num_observations": num_observations,
+        "robot_effort": robot_effort,
+        "robot_stiffness": robot_stiffness,
+        "robot_damping": robot_damping,
+        "default_standing": default_standing,
+        "sim_dt": cfg.sim_dt,
+        "sim_decimation": cfg.sim_decimation,
+        "tau_factor": cfg.tau_factor,
+        "action_scale": cfg.action_scale,
+        "lin_vel_scale": cfg.lin_vel_scale,
+        "ang_vel_scale": cfg.ang_vel_scale,
+        "quat_scale": cfg.quat_scale,
+        "dof_pos_scale": cfg.dof_pos_scale,
+        "dof_vel_scale": cfg.dof_vel_scale,
+        "frame_stack": cfg.frame_stack,
+        "clip_observations": cfg.clip_observations,
+        "clip_actions": cfg.clip_actions,
+    }
 
     return (
         a_model,
-        {
-            "robot_effort": robot_effort,
-            "robot_stiffness": robot_stiffness,
-            "robot_damping": robot_damping,
-            "num_actions": num_actions,
-            "num_observations": num_observations,
-            "default_standing": default_standing,
-        },
+        metadata,
         input_tensors,
     )
 
diff --git a/sim/play.py b/sim/play.py
index 71579476..009019fc 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -24,10 +24,6 @@
 from isaacgym import gymapi
 from tqdm import tqdm
 
-# Get absolute path relative to this script
-kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer"))
-sys.path.append(kinfer_path)
-
 from kinfer import proto as P
 from kinfer.export.pytorch import export_model
 
diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py
index f3e937ee..971f2733 100755
--- a/sim/resources/gpr/joints.py
+++ b/sim/resources/gpr/joints.py
@@ -159,7 +159,6 @@ def stiffness(cls) -> Dict[str, float]:
     def stiffness_mapping(cls) -> Dict[str, float]:
         mapping = {}
         stiffness = cls.stiffness()
-        breakpoint()
         for side in ["L", "R"]:
             for joint, value in stiffness.items():
                 mapping[f"{side}_{joint}"] = value
@@ -176,6 +175,15 @@ def damping(cls) -> Dict[str, float]:
             "ankle_y": 5,
         }
 
+    @classmethod
+    def damping_mapping(cls) -> Dict[str, float]:
+        mapping = {}
+        damping = cls.damping()
+        for side in ["L", "R"]:
+            for joint, value in damping.items():
+                mapping[f"{side}_{joint}"] = value
+        return mapping
+
     # effort_limits
     @classmethod
     def effort(cls) -> Dict[str, float]:
@@ -187,6 +195,15 @@ def effort(cls) -> Dict[str, float]:
             "ankle_y": 17,
         }
 
+    @classmethod
+    def effort_mapping(cls) -> Dict[str, float]:
+        mapping = {}
+        effort = cls.effort()
+        for side in ["L", "R"]:
+            for joint, value in effort.items():
+                mapping[f"{side}_{joint}"] = value
+        return mapping
+
     # vel_limits
     @classmethod
     def velocity(cls) -> Dict[str, float]:
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index 467113eb..a5dad736 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -21,9 +21,6 @@
 import pygame
 import torch
 
-kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer"))
-sys.path.append(kinfer_path)
-
 from kinfer.export.pytorch import export_to_onnx
 from kinfer.inference.python import ONNXModel
 from scipy.spatial.transform import Rotation as R

From 262198d406bb90ecd9731a126c980391e4550007 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Tue, 31 Dec 2024 01:41:29 -0800
Subject: [PATCH 20/43] clean and rename

---
 sim/envs/humanoids/gpr_config.py | 4 ++--
 sim/model_export.py              | 6 +++---
 sim/play.py                      | 4 ----
 sim/sim2sim.py                   | 2 --
 4 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 78637d03..a75b0ec1 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -80,7 +80,7 @@ class env(LeggedRobotCfg.env):
                     ),
                 ),
                 P.ValueSchema(
-                    value_name="buffer",
+                    value_name="state_tensor",
                     state_tensor=P.StateTensorSchema(
                         # 11 is the number of single observation features - 6 from IMU, 5 from command input
                         # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
@@ -106,7 +106,7 @@ class env(LeggedRobotCfg.env):
                     ),
                 ),
                 P.ValueSchema(
-                    value_name="buffer",
+                    value_name="state_tensor",
                     state_tensor=P.StateTensorSchema(
                         shape=[frame_stack * (11 + NUM_JOINTS * 3)],
                         dtype=P.DType.FP32,
diff --git a/sim/model_export.py b/sim/model_export.py
index 26d2a4d7..60a4ceaa 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -132,7 +132,7 @@ def forward(
         prev_actions: Tensor,  # previous actions taken by the model
         imu_ang_vel: Tensor,  # angular velocity of the IMU
         imu_euler_xyz: Tensor,  # euler angles of the IMU
-        buffer: Tensor,  # buffer of previous observations
+        state_tensor: Tensor,  # buffer of previous observations
     ) -> Dict[str, Tensor]:
         """Runs the actor model forward pass.
 
@@ -194,7 +194,7 @@ def forward(
         new_x = torch.clamp(new_x, -self.clip_observations, self.clip_observations)
 
         # Add the new frame to the buffer
-        x = torch.cat((buffer, new_x), dim=0)
+        x = torch.cat((state_tensor, new_x), dim=0)
         # Pop the oldest frame
         x = x[self.num_single_obs :]
 
@@ -204,7 +204,7 @@ def forward(
         actions = self.policy(policy_input).squeeze(0)
         actions_scaled = actions * self.action_scale
 
-        return {"actions": actions_scaled, "actions_raw": actions, "buffer": x}
+        return {"actions": actions_scaled, "actions_raw": actions, "state_tensor": x}
 
 
 def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]:
diff --git a/sim/play.py b/sim/play.py
index 009019fc..955d094a 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -10,10 +10,6 @@
 import math
 import os
 
-# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8
-import sys
-import time
-import uuid
 from datetime import datetime
 from typing import Any, Union
 
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index a5dad736..b21507cc 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -8,8 +8,6 @@
 import math
 import os
 
-# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8
-import sys
 from copy import deepcopy
 from dataclasses import dataclass
 from typing import Dict, List, Tuple, Union

From 0c583dbd19988f07e5c32230899eff2694c0bc58 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Tue, 31 Dec 2024 01:43:25 -0800
Subject: [PATCH 21/43] lint

---
 sim/envs/base/legged_robot_config.py | 1 +
 sim/model_export.py                  | 2 +-
 sim/play.py                          | 4 +---
 sim/sim2sim.py                       | 2 --
 4 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
index 265ff995..71428b71 100644
--- a/sim/envs/base/legged_robot_config.py
+++ b/sim/envs/base/legged_robot_config.py
@@ -35,6 +35,7 @@
 
 from sim.envs.base.base_config import BaseConfig
 
+
 class LeggedRobotCfg(BaseConfig):
     class env:
         num_envs = 4096
diff --git a/sim/model_export.py b/sim/model_export.py
index 60a4ceaa..d7a93abf 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -147,7 +147,7 @@ def forward(
             imu_euler_xyz: The euler angles of the IMU, with shape (3),
                 in radians. "XYZ" means (roll, pitch, yaw). If IMU is not used,
                 can be all zeros.
-            buffer: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is
+            state_tensor: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is
                 the return value of the previous forward pass. On the first
                 pass, it should be all zeros.
 
diff --git a/sim/play.py b/sim/play.py
index 955d094a..1726cf76 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -9,7 +9,6 @@
 import logging
 import math
 import os
-
 from datetime import datetime
 from typing import Any, Union
 
@@ -18,10 +17,9 @@
 import numpy as np
 import onnx
 from isaacgym import gymapi
-from tqdm import tqdm
-
 from kinfer import proto as P
 from kinfer.export.pytorch import export_model
+from tqdm import tqdm
 
 from sim.env import run_dir  # noqa: E402
 from sim.envs import task_registry  # noqa: E402
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index b21507cc..82ce1137 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -7,7 +7,6 @@
 import argparse
 import math
 import os
-
 from copy import deepcopy
 from dataclasses import dataclass
 from typing import Dict, List, Tuple, Union
@@ -18,7 +17,6 @@
 import onnxruntime as ort
 import pygame
 import torch
-
 from kinfer.export.pytorch import export_to_onnx
 from kinfer.inference.python import ONNXModel
 from scipy.spatial.transform import Rotation as R

From ce4d7291cb81a029d5107f9f40b01da131b98310 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Tue, 31 Dec 2024 12:55:29 -0800
Subject: [PATCH 22/43] rename

---
 sim/envs/humanoids/gpr_config.py | 2 +-
 sim/model_export.py              | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index a75b0ec1..c867139f 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -37,7 +37,7 @@ class env(LeggedRobotCfg.env):
                     ),
                 ),
                 P.ValueSchema(
-                    value_name="t",
+                    value_name="timestamp",
                     timestamp=P.TimestampSchema(
                         start_seconds=0,
                     ),
diff --git a/sim/model_export.py b/sim/model_export.py
index d7a93abf..b8a1bbc0 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -126,7 +126,7 @@ def get_init_buffer(self) -> Tensor:
     def forward(
         self,
         vector_command: Tensor,  # (x_vel, y_vel, rot)
-        t: Tensor,  # current policy time (sec)
+        timestamp: Tensor,  # current policy time (sec)
         dof_pos: Tensor,  # current angular position of the DoFs relative to default
         dof_vel: Tensor,  # current angular velocity of the DoFs
         prev_actions: Tensor,  # previous actions taken by the model
@@ -138,7 +138,7 @@ def forward(
 
         Args:
             vector_command: The target velocity vector, with shape (3). It consistes of x_vel, y_vel, and rot.
-            t: The current policy time step, with shape (1).
+            timestamp: The current policy time step, with shape (1).
             dof_pos: The current angular position of the DoFs relative to default, with shape (num_actions).
             dof_vel: The current angular velocity of the DoFs, with shape (num_actions).
             prev_actions: The previous actions taken by the model, with shape (num_actions).
@@ -156,8 +156,8 @@ def forward(
             actions: The actions to take, with shape (num_actions).
             x: The new buffer of observations, with shape (frame_stack * num_single_obs).
         """
-        sin_pos = torch.sin(2 * torch.pi * t / self.cycle_time)
-        cos_pos = torch.cos(2 * torch.pi * t / self.cycle_time)
+        sin_pos = torch.sin(2 * torch.pi * timestamp / self.cycle_time)
+        cos_pos = torch.cos(2 * torch.pi * timestamp / self.cycle_time)
 
         x_vel, y_vel, rot = vector_command.split(1)
 

From 9b0738ab2da28b0ba8726bafa38e8f83ea6b9853 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 14:06:39 -0800
Subject: [PATCH 23/43] push kinfer

---
 third_party/kinfer | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/kinfer b/third_party/kinfer
index e05bfab1..83cff064 160000
--- a/third_party/kinfer
+++ b/third_party/kinfer
@@ -1 +1 @@
-Subproject commit e05bfab1cde94e2ff607b3dff7a5604ef0edcd39
+Subproject commit 83cff06425acc31746a3f23ae219a353c6d49306

From aff40731eb7fac86514cf33b7b3fd71b1c169a10 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 14:24:39 -0800
Subject: [PATCH 24/43] new names

---
 sim/envs/humanoids/gpr_config.py | 4 ++--
 sim/model_export.py              | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index c867139f..f6f5b645 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -80,7 +80,7 @@ class env(LeggedRobotCfg.env):
                     ),
                 ),
                 P.ValueSchema(
-                    value_name="state_tensor",
+                    value_name="hist_obs",
                     state_tensor=P.StateTensorSchema(
                         # 11 is the number of single observation features - 6 from IMU, 5 from command input
                         # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
@@ -106,7 +106,7 @@ class env(LeggedRobotCfg.env):
                     ),
                 ),
                 P.ValueSchema(
-                    value_name="state_tensor",
+                    value_name="new_x",
                     state_tensor=P.StateTensorSchema(
                         shape=[frame_stack * (11 + NUM_JOINTS * 3)],
                         dtype=P.DType.FP32,
diff --git a/sim/model_export.py b/sim/model_export.py
index b8a1bbc0..b57003cf 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -132,7 +132,7 @@ def forward(
         prev_actions: Tensor,  # previous actions taken by the model
         imu_ang_vel: Tensor,  # angular velocity of the IMU
         imu_euler_xyz: Tensor,  # euler angles of the IMU
-        state_tensor: Tensor,  # buffer of previous observations
+        hist_obs: Tensor,  # buffer of previous observations
     ) -> Dict[str, Tensor]:
         """Runs the actor model forward pass.
 
@@ -194,7 +194,7 @@ def forward(
         new_x = torch.clamp(new_x, -self.clip_observations, self.clip_observations)
 
         # Add the new frame to the buffer
-        x = torch.cat((state_tensor, new_x), dim=0)
+        x = torch.cat((hist_obs, new_x), dim=0)
         # Pop the oldest frame
         x = x[self.num_single_obs :]
 
@@ -204,7 +204,7 @@ def forward(
         actions = self.policy(policy_input).squeeze(0)
         actions_scaled = actions * self.action_scale
 
-        return {"actions": actions_scaled, "actions_raw": actions, "state_tensor": x}
+        return {"actions": actions_scaled, "actions_raw": actions, "new_x": x}
 
 
 def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]:
@@ -265,6 +265,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
         "frame_stack": cfg.frame_stack,
         "clip_observations": cfg.clip_observations,
         "clip_actions": cfg.clip_actions,
+        "joint_names": robot.all_joints(),
     }
 
     return (

From ac6a76c695aa56f65eeeffa4bdaefe946249056c Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 14:31:11 -0800
Subject: [PATCH 25/43] policy runs

---
 sim/sim2sim.py     | 196 +++++++++++++++++++++++++++++++++++----------
 third_party/kinfer |   2 +-
 2 files changed, 153 insertions(+), 45 deletions(-)

diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index 82ce1137..946b9f2e 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -17,8 +17,9 @@
 import onnxruntime as ort
 import pygame
 import torch
-from kinfer.export.pytorch import export_to_onnx
 from kinfer.inference.python import ONNXModel
+from kinfer.serialize.numpy import NumpyMultiSerializer
+from kinfer import proto as P
 from scipy.spatial.transform import Rotation as R
 from tqdm import tqdm
 
@@ -92,9 +93,10 @@ def pd_control(
     return kp * (target_q + default - q) - kd * dq
 
 
+
 def run_mujoco(
     embodiment: str,
-    policy: ort.InferenceSession,
+    policy: ONNXModel,
     model_info: Dict[str, Union[float, List[float], str]],
     keyboard_use: bool = False,
     log_h5: bool = False,
@@ -121,10 +123,16 @@ def run_mujoco(
     assert isinstance(model_info["robot_effort"], list)
     assert isinstance(model_info["robot_stiffness"], list)
     assert isinstance(model_info["robot_damping"], list)
+    assert isinstance(model_info["joint_names"], list)
+    
+
+    tau_limit = np.array(list(model_info["robot_effort"])) * model_info["tau_factor"]
+    kps = np.array(list(model_info["robot_stiffness"]))
+    kds = np.array(list(model_info["robot_damping"]))
+
+    joint_names = model_info["joint_names"]
 
-    tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * model_info["tau_factor"]
-    kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"]))
-    kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"]))
+    kinfer_serializer = NumpyMultiSerializer(policy.output_schema)
 
     try:
         data.qpos = model.keyframe("default").qpos
@@ -147,23 +155,10 @@ def run_mujoco(
 
     target_q = np.zeros((model_info["num_actions"]), dtype=np.double)
     prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double)
-    hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double)
+    hist_obs = np.zeros((model_info["num_observations"]), dtype=np.float32)
 
     count_lowlevel = 0
 
-    input_data = {
-        "x_vel.1": np.zeros(1).astype(np.float32),
-        "y_vel.1": np.zeros(1).astype(np.float32),
-        "rot.1": np.zeros(1).astype(np.float32),
-        "t.1": np.zeros(1).astype(np.float32),
-        "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32),
-        "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32),
-        "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32),
-        "imu_ang_vel.1": np.zeros(3).astype(np.float32),
-        "imu_euler_xyz.1": np.zeros(3).astype(np.float32),
-        "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32),
-    }
-
     if log_h5:
         from sim.h5_logger import HDF5Logger
 
@@ -211,26 +206,133 @@ def run_mujoco(
             cur_pos_obs = q - default
             cur_vel_obs = dq
 
-            input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32)
-            input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32)
-            input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32)
 
-            input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32)
+            # Form input
+            dof_pos = P.Value(
+                value_name="dof_pos",
+                joint_positions=P.JointPositionsValue(
+                    values=[
+                        P.JointPositionValue(
+                            joint_name=name,
+                            value=q[index],
+                            unit=P.JointPositionUnit.RADIANS,
+                        )
+                        for name, index in zip(joint_names, range(len(q)))
+                    ]
+                )
+            )
+
+            dof_vel = P.Value(
+                value_name="dof_vel",
+                joint_velocities=P.JointVelocitiesValue(
+                    values=[
+                        P.JointVelocityValue(
+                            joint_name=name,
+                            value=dq[index],
+                            unit=P.JointVelocityUnit.RADIANS_PER_SECOND,
+                        )
+                        for name, index in zip(joint_names, range(len(dq)))
+                    ]
+                )
+            )
 
-            input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32)
-            input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32)
+            vector_command = P.Value(
+                value_name="vector_command",
+                vector_command=P.VectorCommandValue(
+                    values=[x_vel_cmd, y_vel_cmd, yaw_vel_cmd],
+                )
+            )
+            seconds = int(count_lowlevel * model_info["sim_dt"])
+            nanoseconds = int((count_lowlevel * model_info["sim_dt"] - seconds) * 1e9)
+            timestamp = P.Value(
+                value_name="timestamp",
+                timestamp=P.TimestampValue(
+                    seconds=seconds,
+                    nanos=nanoseconds,
+                )
+            )
+
+            prev_actions_value = P.Value(
+                value_name="prev_actions",
+                joint_positions=P.JointPositionsValue(
+                    values=[
+                        P.JointPositionValue(
+                            joint_name=name,
+                            value=prev_actions[index],
+                            unit=P.JointPositionUnit.RADIANS,
+                        )
+                        for name, index in zip(joint_names, range(len(prev_actions)))
+                    ]
+                )
+            )
+
+            imu_ang_vel = P.Value(
+                value_name="imu_ang_vel",
+                imu=P.ImuValue(
+                    angular_velocity=P.ImuGyroscopeValue(
+                        x=omega[0],
+                        y=omega[1],
+                        z=omega[2],
+                    )
+                )
+            )
+
+            imu_euler_xyz = P.Value(
+                value_name="imu_euler_xyz",
+                imu=P.ImuValue(
+                    linear_acceleration=P.ImuAccelerometerValue(
+                        x=eu_ang[0],
+                        y=eu_ang[1],
+                        z=eu_ang[2],
+                    ),
+                )
+            )
 
-            input_data["prev_actions.1"] = prev_actions.astype(np.float32)
+            state_tensor = P.Value(
+                value_name="hist_obs",
+                state_tensor=P.StateTensorValue(
+                    data=hist_obs.tobytes(),
+                )
+            )
+
+            policy_input = P.IO(
+                values=[
+                    vector_command,
+                    timestamp,
+                    dof_pos,
+                    dof_vel,
+                    prev_actions_value,
+                    imu_ang_vel,
+                    imu_euler_xyz,
+                    state_tensor,
+                ]
+            )
+
+            # input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32)
+            # input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32)
+            # input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32)
+
+            # input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32)
+
+            # input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32)
+            # input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32)
 
-            input_data["imu_ang_vel.1"] = omega.astype(np.float32)
-            input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32)
+            # input_data["prev_actions.1"] = prev_actions.astype(np.float32)
 
-            input_data["buffer.1"] = hist_obs.astype(np.float32)
+            # input_data["imu_ang_vel.1"] = omega.astype(np.float32)
+            # input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32)
 
-            policy_output = policy(input_data)
-            positions = policy_output["actions_scaled"]
-            curr_actions = policy_output["actions"]
-            hist_obs = policy_output["x.3"]
+            # input_data["buffer.1"] = hist_obs.astype(np.float32)
+
+            # policy_output = policy(input_data)
+
+            policy_output = policy(policy_input)
+
+            outputs = kinfer_serializer.serialize_io(policy_output, as_dict=True)
+
+            positions = outputs["actions"]
+            curr_actions = outputs["actions_raw"]
+            hist_obs = outputs["new_x"]
 
             target_q = positions
 
@@ -305,18 +407,24 @@ def run_mujoco(
         x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.2, 0.0, 0.0
 
     policy = ONNXModel(args.load_model)
-    metadata = policy.get_metadata()
-    model_info = {
-        "num_actions": metadata["num_actions"],
-        "num_observations": metadata["num_observations"],
-        "robot_effort": metadata["robot_effort"],
-        "robot_stiffness": metadata["robot_stiffness"],
-        "robot_damping": metadata["robot_damping"],
-        "sim_dt": metadata["sim_dt"],
-        "sim_decimation": metadata["sim_decimation"],
-        "tau_factor": metadata["tau_factor"],
-    }
+    metadata = policy.attached_metadata
 
+    try:
+        model_info = {
+            "num_actions": metadata["num_actions"],
+            "num_observations": metadata["num_observations"],
+            "robot_effort": [metadata["robot_effort"][joint] for joint in metadata["joint_names"]],
+            "robot_stiffness": [metadata["robot_stiffness"][joint] for joint in metadata["joint_names"]],
+            "robot_damping": [metadata["robot_damping"][joint] for joint in metadata["joint_names"]],
+            "sim_dt": metadata["sim_dt"],
+            "sim_decimation": metadata["sim_decimation"],
+            "tau_factor": metadata["tau_factor"],
+            "joint_names": metadata["joint_names"],
+        }
+    except Exception as e:
+        print(f"Error finding required metadata 'num_actions', 'num_observations', 'robot_effort', 'robot_stiffness', 'robot_damping', 'sim_dt', 'sim_decimation', 'tau_factor' in metadata: {metadata}")
+        raise e
+    
     run_mujoco(
         embodiment=args.embodiment,
         policy=policy,
diff --git a/third_party/kinfer b/third_party/kinfer
index 83cff064..e7b2dcd2 160000
--- a/third_party/kinfer
+++ b/third_party/kinfer
@@ -1 +1 @@
-Subproject commit 83cff06425acc31746a3f23ae219a353c6d49306
+Subproject commit e7b2dcd2b7728d853f778e23585b2f3eb26f6658

From 30791e6d5558f8960e7726d0d1674ed264500693 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 14:44:58 -0800
Subject: [PATCH 26/43] joint name ordering

---
 sim/envs/humanoids/gpr_config.py | 10 +++++-----
 sim/model_export.py              |  2 +-
 sim/resources/gpr/joints.py      |  6 ++++++
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index f6f5b645..0bb526e4 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -45,21 +45,21 @@ class env(LeggedRobotCfg.env):
                 P.ValueSchema(
                     value_name="dof_pos",
                     joint_positions=P.JointPositionsSchema(
-                        joint_names=Robot.all_joints(),
+                        joint_names=Robot.joint_names(),
                         unit=P.JointPositionUnit.RADIANS,
                     ),
                 ),
                 P.ValueSchema(
                     value_name="dof_vel",
                     joint_velocities=P.JointVelocitiesSchema(
-                        joint_names=Robot.all_joints(),
+                        joint_names=Robot.joint_names(),
                         unit=P.JointVelocityUnit.RADIANS_PER_SECOND,
                     ),
                 ),
                 P.ValueSchema(
                     value_name="prev_actions",
                     joint_positions=P.JointPositionsSchema(
-                        joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS
+                        joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS
                     ),
                 ),
                 # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data
@@ -96,13 +96,13 @@ class env(LeggedRobotCfg.env):
                 P.ValueSchema(
                     value_name="actions",
                     joint_positions=P.JointPositionsSchema(
-                        joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS
+                        joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS
                     ),
                 ),
                 P.ValueSchema(
                     value_name="actions_raw",
                     joint_positions=P.JointPositionsSchema(
-                        joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS
+                        joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS
                     ),
                 ),
                 P.ValueSchema(
diff --git a/sim/model_export.py b/sim/model_export.py
index b57003cf..c35d7422 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -265,7 +265,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
         "frame_stack": cfg.frame_stack,
         "clip_observations": cfg.clip_observations,
         "clip_actions": cfg.clip_actions,
-        "joint_names": robot.all_joints(),
+        "joint_names": robot.joint_names(),
     }
 
     return (
diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py
index 971f2733..31e1f12e 100755
--- a/sim/resources/gpr/joints.py
+++ b/sim/resources/gpr/joints.py
@@ -137,6 +137,12 @@ def default_standing(cls) -> Dict[str, float]:
             Robot.legs.right.ankle_pitch: 0.195,
         }
 
+    # CONTRACT - this should be ordered according to how the policy is trained.
+    # E.g. the first entry should be the name of the first joint in the policy.
+    @classmethod
+    def joint_names(cls) -> List[str]:
+        return list(cls.default_standing().keys())
+
     @classmethod
     def default_limits(cls) -> Dict[str, Dict[str, float]]:
         return {

From 5283db11d29aae9447e8cffcd33594f78825e099 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 14:47:42 -0800
Subject: [PATCH 27/43] stands

---
 sim/sim2sim.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index 946b9f2e..f0356f4d 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -127,8 +127,8 @@ def run_mujoco(
     
 
     tau_limit = np.array(list(model_info["robot_effort"])) * model_info["tau_factor"]
-    kps = np.array(list(model_info["robot_stiffness"]))
-    kds = np.array(list(model_info["robot_damping"]))
+    kps = np.array(model_info["robot_stiffness"])
+    kds = np.array(model_info["robot_damping"])
 
     joint_names = model_info["joint_names"]
 
@@ -206,6 +206,7 @@ def run_mujoco(
             cur_pos_obs = q - default
             cur_vel_obs = dq
 
+            mm = q
 
             # Form input
             dof_pos = P.Value(

From 01fdc369a1dda51917ca639349cf5f3c3c687ebe Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 15:00:31 -0800
Subject: [PATCH 28/43] clean

---
 sim/sim2sim.py | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index f0356f4d..b938d9fb 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -124,6 +124,9 @@ def run_mujoco(
     assert isinstance(model_info["robot_stiffness"], list)
     assert isinstance(model_info["robot_damping"], list)
     assert isinstance(model_info["joint_names"], list)
+    assert isinstance(model_info["sim_dt"], float)
+    assert isinstance(model_info["cycle_time"], float)
+    assert isinstance(model_info["sim_decimation"], int)
     
 
     tau_limit = np.array(list(model_info["robot_effort"])) * model_info["tau_factor"]
@@ -197,7 +200,7 @@ def run_mujoco(
 
         # Calculate speed and accumulate for average speed calculation
         speed = np.linalg.norm(v[:2])  # Speed in the x-y plane
-        total_speed += speed
+        total_speed += float(speed)
         step_count += 1
 
         # 1000hz -> 50hz
@@ -309,24 +312,6 @@ def run_mujoco(
                 ]
             )
 
-            # input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32)
-            # input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32)
-            # input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32)
-
-            # input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32)
-
-            # input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32)
-            # input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32)
-
-            # input_data["prev_actions.1"] = prev_actions.astype(np.float32)
-
-            # input_data["imu_ang_vel.1"] = omega.astype(np.float32)
-            # input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32)
-
-            # input_data["buffer.1"] = hist_obs.astype(np.float32)
-
-            # policy_output = policy(input_data)
-
             policy_output = policy(policy_input)
 
             outputs = kinfer_serializer.serialize_io(policy_output, as_dict=True)

From f02bd84ae38279c4eb88a8423622d43086ca244a Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 15:01:09 -0800
Subject: [PATCH 29/43] lint

---
 sim/model_export.py |  4 ++--
 sim/sim2sim.py      | 26 +++++++++++++-------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/sim/model_export.py b/sim/model_export.py
index c35d7422..68203b9d 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -147,14 +147,14 @@ def forward(
             imu_euler_xyz: The euler angles of the IMU, with shape (3),
                 in radians. "XYZ" means (roll, pitch, yaw). If IMU is not used,
                 can be all zeros.
-            state_tensor: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is
+            hist_obs: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is
                 the return value of the previous forward pass. On the first
                 pass, it should be all zeros.
 
         Returns:
             actions_scaled: The actions to take, with shape (num_actions), scaled by the action_scale.
             actions: The actions to take, with shape (num_actions).
-            x: The new buffer of observations, with shape (frame_stack * num_single_obs).
+            new_x: The new buffer of observations, with shape (frame_stack * num_single_obs).
         """
         sin_pos = torch.sin(2 * torch.pi * timestamp / self.cycle_time)
         cos_pos = torch.cos(2 * torch.pi * timestamp / self.cycle_time)
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index b938d9fb..b7688e28 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -17,9 +17,9 @@
 import onnxruntime as ort
 import pygame
 import torch
+from kinfer import proto as P
 from kinfer.inference.python import ONNXModel
 from kinfer.serialize.numpy import NumpyMultiSerializer
-from kinfer import proto as P
 from scipy.spatial.transform import Rotation as R
 from tqdm import tqdm
 
@@ -93,7 +93,6 @@ def pd_control(
     return kp * (target_q + default - q) - kd * dq
 
 
-
 def run_mujoco(
     embodiment: str,
     policy: ONNXModel,
@@ -127,7 +126,6 @@ def run_mujoco(
     assert isinstance(model_info["sim_dt"], float)
     assert isinstance(model_info["cycle_time"], float)
     assert isinstance(model_info["sim_decimation"], int)
-    
 
     tau_limit = np.array(list(model_info["robot_effort"])) * model_info["tau_factor"]
     kps = np.array(model_info["robot_stiffness"])
@@ -223,7 +221,7 @@ def run_mujoco(
                         )
                         for name, index in zip(joint_names, range(len(q)))
                     ]
-                )
+                ),
             )
 
             dof_vel = P.Value(
@@ -237,14 +235,14 @@ def run_mujoco(
                         )
                         for name, index in zip(joint_names, range(len(dq)))
                     ]
-                )
+                ),
             )
 
             vector_command = P.Value(
                 value_name="vector_command",
                 vector_command=P.VectorCommandValue(
                     values=[x_vel_cmd, y_vel_cmd, yaw_vel_cmd],
-                )
+                ),
             )
             seconds = int(count_lowlevel * model_info["sim_dt"])
             nanoseconds = int((count_lowlevel * model_info["sim_dt"] - seconds) * 1e9)
@@ -253,7 +251,7 @@ def run_mujoco(
                 timestamp=P.TimestampValue(
                     seconds=seconds,
                     nanos=nanoseconds,
-                )
+                ),
             )
 
             prev_actions_value = P.Value(
@@ -267,7 +265,7 @@ def run_mujoco(
                         )
                         for name, index in zip(joint_names, range(len(prev_actions)))
                     ]
-                )
+                ),
             )
 
             imu_ang_vel = P.Value(
@@ -278,7 +276,7 @@ def run_mujoco(
                         y=omega[1],
                         z=omega[2],
                     )
-                )
+                ),
             )
 
             imu_euler_xyz = P.Value(
@@ -289,14 +287,14 @@ def run_mujoco(
                         y=eu_ang[1],
                         z=eu_ang[2],
                     ),
-                )
+                ),
             )
 
             state_tensor = P.Value(
                 value_name="hist_obs",
                 state_tensor=P.StateTensorValue(
                     data=hist_obs.tobytes(),
-                )
+                ),
             )
 
             policy_input = P.IO(
@@ -408,9 +406,11 @@ def run_mujoco(
             "joint_names": metadata["joint_names"],
         }
     except Exception as e:
-        print(f"Error finding required metadata 'num_actions', 'num_observations', 'robot_effort', 'robot_stiffness', 'robot_damping', 'sim_dt', 'sim_decimation', 'tau_factor' in metadata: {metadata}")
+        print(
+            f"Error finding required metadata 'num_actions', 'num_observations', 'robot_effort', 'robot_stiffness', 'robot_damping', 'sim_dt', 'sim_decimation', 'tau_factor' in metadata: {metadata}"
+        )
         raise e
-    
+
     run_mujoco(
         embodiment=args.embodiment,
         policy=policy,

From 87f0253fbdb5e1d02ea21f1e8aeb5a2bc272dac8 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 21:47:35 -0800
Subject: [PATCH 30/43] cycle time

---
 sim/model_export.py | 1 +
 sim/sim2sim.py      | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sim/model_export.py b/sim/model_export.py
index 68203b9d..e3ee0d4f 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -266,6 +266,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
         "clip_observations": cfg.clip_observations,
         "clip_actions": cfg.clip_actions,
         "joint_names": robot.joint_names(),
+        "cycle_time": cfg.cycle_time,
     }
 
     return (
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index b7688e28..3823d8c0 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -207,8 +207,6 @@ def run_mujoco(
             cur_pos_obs = q - default
             cur_vel_obs = dq
 
-            mm = q
-
             # Form input
             dof_pos = P.Value(
                 value_name="dof_pos",
@@ -244,6 +242,7 @@ def run_mujoco(
                     values=[x_vel_cmd, y_vel_cmd, yaw_vel_cmd],
                 ),
             )
+
             seconds = int(count_lowlevel * model_info["sim_dt"])
             nanoseconds = int((count_lowlevel * model_info["sim_dt"] - seconds) * 1e9)
             timestamp = P.Value(
@@ -404,6 +403,7 @@ def run_mujoco(
             "sim_decimation": metadata["sim_decimation"],
             "tau_factor": metadata["tau_factor"],
             "joint_names": metadata["joint_names"],
+            "cycle_time": metadata["cycle_time"],
         }
     except Exception as e:
         print(

From 9cfde7f7072b28ec57ac6fd5d57dff9ab16ffb4c Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 22:06:57 -0800
Subject: [PATCH 31/43] noise to default

---
 sim/envs/base/legged_robot.py    | 11 +++++++++++
 sim/envs/humanoids/gpr_config.py |  1 +
 2 files changed, 12 insertions(+)

diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
index cb3bdf60..fb89d649 100644
--- a/sim/envs/base/legged_robot.py
+++ b/sim/envs/base/legged_robot.py
@@ -81,6 +81,7 @@ def step(self, actions):
     def reset(self):
         """Reset all robots"""
         self.reset_idx(torch.arange(self.num_envs, device=self.device))
+        self._resample_default_positions()
         obs, privileged_obs, _, _, _ = self.step(
             torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False)
         )
@@ -470,6 +471,16 @@ def update_command_curriculum(self, env_ids):
                 self.command_ranges["lin_vel_x"][1] + 0.5, 0.0, self.cfg.commands.max_curriculum
             )
 
+
+    def _resample_default_positions(self):
+        self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
+        for i in range(self.num_dofs):
+            name = self.dof_names[i]
+            self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name]
+
+            if self.add_noise:
+                self.default_dof_pos[i] *= self.cfg.noise.noise_scales.default_pos
+
     # ----------------------------------------
     def _init_buffers(self):
         """Initialize torch tensors which will contain simulation states and processed quantities"""
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 0bb526e4..3cdbf730 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -167,6 +167,7 @@ class noise_scales:
             lin_vel = 0.05
             quat = 0.03
             height_measurements = 0.1
+            default_pos = 0.05
 
     class init_state(LeggedRobotCfg.init_state):
         pos = [0.0, 0.0, Robot.height]

From bc56f7ec823254f28834d2b99c2242abefe77c70 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 23:34:00 -0800
Subject: [PATCH 32/43] change noise behavior

---
 sim/envs/base/legged_robot.py    | 2 +-
 sim/envs/humanoids/gpr_config.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
index fb89d649..ed4b3461 100644
--- a/sim/envs/base/legged_robot.py
+++ b/sim/envs/base/legged_robot.py
@@ -479,7 +479,7 @@ def _resample_default_positions(self):
             self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name]
 
             if self.add_noise:
-                self.default_dof_pos[i] *= self.cfg.noise.noise_scales.default_pos
+                self.default_dof_pos[i] += torch.randn(1) * self.cfg.noise.noise_ranges.default_pos
 
     # ----------------------------------------
     def _init_buffers(self):
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 3cdbf730..e9d6989e 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -167,7 +167,9 @@ class noise_scales:
             lin_vel = 0.05
             quat = 0.03
             height_measurements = 0.1
-            default_pos = 0.05
+
+        class nosie_ranges:
+            default_pos = 0.05 # +- 0.05 rad
 
     class init_state(LeggedRobotCfg.init_state):
         pos = [0.0, 0.0, Robot.height]

From 3574ef1bbfa73d20e47e5b84d379d1c7be9e8677 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 23:34:35 -0800
Subject: [PATCH 33/43] less noise

---
 sim/envs/humanoids/gpr_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index e9d6989e..f8c0708b 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -169,7 +169,7 @@ class noise_scales:
             height_measurements = 0.1
 
         class nosie_ranges:
-            default_pos = 0.05 # +- 0.05 rad
+            default_pos = 0.03 # +- 0.05 rad
 
     class init_state(LeggedRobotCfg.init_state):
         pos = [0.0, 0.0, Robot.height]

From 63ed69427f4cb7fec870c04463ef0ce052d1bcd5 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Wed, 1 Jan 2025 23:37:36 -0800
Subject: [PATCH 34/43] fix noise

---
 sim/envs/base/legged_robot.py    | 2 +-
 sim/envs/humanoids/gpr_config.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
index ed4b3461..1cc6e5af 100644
--- a/sim/envs/base/legged_robot.py
+++ b/sim/envs/base/legged_robot.py
@@ -479,7 +479,7 @@ def _resample_default_positions(self):
             self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name]
 
             if self.add_noise:
-                self.default_dof_pos[i] += torch.randn(1) * self.cfg.noise.noise_ranges.default_pos
+                self.default_dof_pos[i] += torch.randn(1).item() * self.cfg.noise.noise_ranges.default_pos
 
     # ----------------------------------------
     def _init_buffers(self):
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index f8c0708b..9bd940d2 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -168,7 +168,7 @@ class noise_scales:
             quat = 0.03
             height_measurements = 0.1
 
-        class nosie_ranges:
+        class noise_ranges:
             default_pos = 0.03 # +- 0.05 rad
 
     class init_state(LeggedRobotCfg.init_state):

From d537864b4df19a2cfc762b02116ad09e52e8048e Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Thu, 2 Jan 2025 01:03:34 -0800
Subject: [PATCH 35/43] lint

---
 sim/envs/base/legged_robot.py    | 1 -
 sim/envs/humanoids/gpr_config.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
index 1cc6e5af..1b17e60b 100644
--- a/sim/envs/base/legged_robot.py
+++ b/sim/envs/base/legged_robot.py
@@ -471,7 +471,6 @@ def update_command_curriculum(self, env_ids):
                 self.command_ranges["lin_vel_x"][1] + 0.5, 0.0, self.cfg.commands.max_curriculum
             )
 
-
     def _resample_default_positions(self):
         self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
         for i in range(self.num_dofs):
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 9bd940d2..8099328d 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -169,7 +169,7 @@ class noise_scales:
             height_measurements = 0.1
 
         class noise_ranges:
-            default_pos = 0.03 # +- 0.05 rad
+            default_pos = 0.03  # +- 0.05 rad
 
     class init_state(LeggedRobotCfg.init_state):
         pos = [0.0, 0.0, Robot.height]

From 20add09d3eedd02aefbce6a8feb99f9e761d51f2 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Thu, 2 Jan 2025 14:54:21 -0800
Subject: [PATCH 36/43] get name from schema

---
 sim/sim2sim.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index 3823d8c0..c737521b 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -392,6 +392,12 @@ def run_mujoco(
     policy = ONNXModel(args.load_model)
     metadata = policy.attached_metadata
 
+    joint_names = []
+    for value_schema in policy.input_schema.values + policy.output_schema.values:
+        if value_schema.HasField("joint_positions"):
+            joint_names = list(value_schema.joint_positions.joint_names)
+            break
+
     try:
         model_info = {
             "num_actions": metadata["num_actions"],
@@ -402,7 +408,7 @@ def run_mujoco(
             "sim_dt": metadata["sim_dt"],
             "sim_decimation": metadata["sim_decimation"],
             "tau_factor": metadata["tau_factor"],
-            "joint_names": metadata["joint_names"],
+            "joint_names": joint_names,
             "cycle_time": metadata["cycle_time"],
         }
     except Exception as e:

From 07b9a9dde9835354dedef98a9af615109baed109 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Thu, 2 Jan 2025 14:57:00 -0800
Subject: [PATCH 37/43] fix

---
 sim/envs/humanoids/gpr_config.py | 4 ++--
 sim/sim2sim.py                   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 8099328d..506591b6 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -6,6 +6,8 @@
     LeggedRobotCfgPPO,
 )
 from sim.resources.gpr.joints import Robot
+from kinfer import proto as P
+
 
 NUM_JOINTS = len(Robot.all_joints())
 
@@ -26,8 +28,6 @@ class env(LeggedRobotCfg.env):
         episode_length_s = 24  # episode length in seconds
         use_ref_actions = False
 
-        from kinfer import proto as P
-
         input_schema = P.IOSchema(
             values=[
                 P.ValueSchema(
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index c737521b..a32515e5 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -393,7 +393,7 @@ def run_mujoco(
     metadata = policy.attached_metadata
 
     joint_names = []
-    for value_schema in policy.input_schema.values + policy.output_schema.values:
+    for value_schema in policy.input_schema.values:
         if value_schema.HasField("joint_positions"):
             joint_names = list(value_schema.joint_positions.joint_names)
             break

From d37961d14569bf5d893fcbf9bddb6a88f41d2638 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Thu, 2 Jan 2025 14:58:01 -0800
Subject: [PATCH 38/43] lint

---
 sim/envs/humanoids/gpr_config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 506591b6..56ceaebc 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -1,13 +1,13 @@
 """Defines the environment configuration for the Getting up task"""
 
+from kinfer import proto as P
+
 from sim.env import robot_urdf_path
 from sim.envs.base.legged_robot_config import (  # type: ignore
     LeggedRobotCfg,
     LeggedRobotCfgPPO,
 )
 from sim.resources.gpr.joints import Robot
-from kinfer import proto as P
-
 
 NUM_JOINTS = len(Robot.all_joints())
 

From 34783c0ecc036beae52dc659c4cb57c7c30923d1 Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Thu, 2 Jan 2025 14:59:30 -0800
Subject: [PATCH 39/43] more info from schema

---
 sim/sim2sim.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index a32515e5..8b846ce9 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -400,11 +400,11 @@ def run_mujoco(
 
     try:
         model_info = {
-            "num_actions": metadata["num_actions"],
+            "num_actions": len(joint_names),
             "num_observations": metadata["num_observations"],
-            "robot_effort": [metadata["robot_effort"][joint] for joint in metadata["joint_names"]],
-            "robot_stiffness": [metadata["robot_stiffness"][joint] for joint in metadata["joint_names"]],
-            "robot_damping": [metadata["robot_damping"][joint] for joint in metadata["joint_names"]],
+            "robot_effort": [metadata["robot_effort"][joint] for joint in joint_names],
+            "robot_stiffness": [metadata["robot_stiffness"][joint] for joint in joint_names],
+            "robot_damping": [metadata["robot_damping"][joint] for joint in joint_names],
             "sim_dt": metadata["sim_dt"],
             "sim_decimation": metadata["sim_decimation"],
             "tau_factor": metadata["tau_factor"],

From d3ca3d0d4278860d7c8a6111607f8d6c7cbbc05c Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Thu, 2 Jan 2025 15:24:57 -0800
Subject: [PATCH 40/43] imu

---
 sim/resources/gpr/robot_fixed.urdf | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf
index 904e5ec3..20f2fff7 100644
--- a/sim/resources/gpr/robot_fixed.urdf
+++ b/sim/resources/gpr/robot_fixed.urdf
@@ -6,6 +6,22 @@
       <inertia ixx="0.000001" ixy="0" ixz="0" iyy="0.000001" iyz="0" izz="0.000001" />
     </inertial>
   </link>
+  <link name="imu_link">
+    <visual>
+      <origin xyz="0 0 0" rpy="0 0 0"/>
+      <geometry>
+        <box size="0.05 0.05 0.05"/>
+      </geometry>
+      <material name="imu_material">
+        <color rgba="1 0 0 0.5"/>
+      </material>
+    </visual>
+  </link>
+  <joint name="imu_joint" type="fixed">
+    <origin xyz="-0.008 -0.32 0" rpy="0 0 0"/>
+    <parent link="body1-part"/>
+    <child link="imu_link"/>
+  </joint>
   <joint name="floating_base" type="fixed">
     <origin rpy="-1.57 3.14 0" xyz="0 0 0" />
     <parent link="base" />
@@ -613,4 +629,4 @@
       <origin xyz="0.021026404502789243 -0.018472020400304597 -0.01930430937248649" rpy="0 0 0" />
     </inertial>
   </link>
-</robot>
\ No newline at end of file
+</robot>

From 570541873dea7755db7bfd7cd45f211590541b3a Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Thu, 2 Jan 2025 15:30:59 -0800
Subject: [PATCH 41/43] imu

---
 sim/resources/gpr/robot_fixed.urdf | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf
index 20f2fff7..b16a8ba5 100644
--- a/sim/resources/gpr/robot_fixed.urdf
+++ b/sim/resources/gpr/robot_fixed.urdf
@@ -7,20 +7,26 @@
     </inertial>
   </link>
   <link name="imu_link">
-    <visual>
-      <origin xyz="0 0 0" rpy="0 0 0"/>
+      <visual>
       <geometry>
-        <box size="0.05 0.05 0.05"/>
+        <box size="0.3 0.01 0.01"/>
       </geometry>
-      <material name="imu_material">
-        <color rgba="1 0 0 0.5"/>
+      <material name="red">
+        <color rgba="1 0 0 1"/>
       </material>
     </visual>
   </link>
-  <joint name="imu_joint" type="fixed">
-    <origin xyz="-0.008 -0.32 0" rpy="0 0 0"/>
-    <parent link="body1-part"/>
-    <child link="imu_link"/>
+  <joint
+    name="imu_joint"
+    type="fixed"
+    dont_collapse="true">
+    <origin
+      xyz="0.03 0.0 -0.08"
+      rpy="0 0 0" />
+    <parent
+      link="base" />
+    <child
+      link="imu_link" />
   </joint>
   <joint name="floating_base" type="fixed">
     <origin rpy="-1.57 3.14 0" xyz="0 0 0" />

From cfb251331e8431e36435b6d93c0cac412f1fe93d Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Thu, 2 Jan 2025 15:39:05 -0800
Subject: [PATCH 42/43] add imu missing warning

---
 sim/envs/base/legged_robot.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
index 1b17e60b..2953ee52 100644
--- a/sim/envs/base/legged_robot.py
+++ b/sim/envs/base/legged_robot.py
@@ -801,6 +801,7 @@ def _create_envs(self):
         )
         if self.imu_indices == -1:
             self.imu_indices = None
+            print(f"Warning: IMU {self.cfg.asset.imu_name} not found in the asset. Defaulting to base link.")
 
         self.penalised_contact_indices = torch.zeros(
             len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False

From d37834d4e300856f544d5de4113d32f1e8d5d8cc Mon Sep 17 00:00:00 2001
From: Wesley Maa <wesley.maa@gmail.com>
Date: Thu, 2 Jan 2025 17:00:55 -0800
Subject: [PATCH 43/43] remove default randomization

---
 sim/envs/base/legged_robot.py    | 2 +-
 sim/envs/humanoids/gpr_config.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
index 2953ee52..47edade7 100644
--- a/sim/envs/base/legged_robot.py
+++ b/sim/envs/base/legged_robot.py
@@ -81,7 +81,7 @@ def step(self, actions):
     def reset(self):
         """Reset all robots"""
         self.reset_idx(torch.arange(self.num_envs, device=self.device))
-        self._resample_default_positions()
+        # self._resample_default_positions()
         obs, privileged_obs, _, _, _ = self.step(
             torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False)
         )
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 56ceaebc..128183f9 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -168,8 +168,9 @@ class noise_scales:
             quat = 0.03
             height_measurements = 0.1
 
-        class noise_ranges:
-            default_pos = 0.03  # +- 0.05 rad
+        # Currently unused
+        # class noise_ranges:
+        #     default_pos = 0.03  # +- 0.05 rad
 
     class init_state(LeggedRobotCfg.init_state):
         pos = [0.0, 0.0, Robot.height]