From bafb56fd84d2da56816ac676c18f16bb7d4c9792 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sun, 29 Dec 2024 16:40:28 -0800 Subject: [PATCH 01/43] kinfer module --- .gitmodules | 5 ++++- sim/sim2sim.py | 4 ++++ third_party/kinfer | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) create mode 160000 third_party/kinfer diff --git a/.gitmodules b/.gitmodules index 1d08b1ee..4e07ec9f 100755 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "third_party/isaacgym"] path = third_party/isaacgym - url = ../../kscalelabs/isaacgym.git \ No newline at end of file + url = ../../kscalelabs/isaacgym.git +[submodule "third_party/kinfer"] + path = third_party/kinfer + url = https://github.com/kscalelabs/kinfer.git diff --git a/sim/sim2sim.py b/sim/sim2sim.py index 82ce1137..3a9bb669 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -17,8 +17,12 @@ import onnxruntime as ort import pygame import torch +# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8 +import sys +sys.path.append('third_party/kinfer') from kinfer.export.pytorch import export_to_onnx from kinfer.inference.python import ONNXModel + from scipy.spatial.transform import Rotation as R from tqdm import tqdm diff --git a/third_party/kinfer b/third_party/kinfer new file mode 160000 index 00000000..acfa5f67 --- /dev/null +++ b/third_party/kinfer @@ -0,0 +1 @@ +Subproject commit acfa5f67aa26fedfc77c724358d3e4b35df7c365 From faa91193d5d1fe4f5bcff342de0cd8db70ec35ee Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sun, 29 Dec 2024 17:37:50 -0800 Subject: [PATCH 02/43] ppo schema --- .gitignore | 3 + sim/play.py | 135 ++++++++++++++++++++++++++++++++--------- sim/sim2sim.py | 5 +- sim/utils/resources.py | 9 +++ 4 files changed, 123 insertions(+), 29 deletions(-) create mode 100644 sim/utils/resources.py diff --git a/.gitignore b/.gitignore index d09d6354..194270e2 100755 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ wandb/ runs/ isaacgym/ *.h5 + +# dev +ref/ diff --git a/sim/play.py b/sim/play.py index 04465dd9..c65cfc90 100755 --- a/sim/play.py +++ b/sim/play.py @@ -18,9 +18,20 @@ import h5py import numpy as np from isaacgym import gymapi -from kinfer.export.pytorch import export_to_onnx from tqdm import tqdm +# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8 +import sys + +# Get absolute path relative to this script +kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'third_party', 'kinfer')) +sys.path.append(kinfer_path) + +from kinfer import proto as P +from kinfer.export.python import export_model + +from sim.utils.resources import load_embodiment + from sim.env import run_dir # noqa: E402 from sim.envs import task_registry # noqa: E402 @@ -55,7 +66,7 @@ def play(args: argparse.Namespace) -> None: else: env_cfg.terrain.mesh_type = "plane" env_cfg.terrain.num_rows = 5 - env_cfg.terrain.num_cols = 5 + env_cfg.terrain.num_cxols = 5 env_cfg.terrain.curriculum = False env_cfg.terrain.max_init_terrain_level = 5 env_cfg.noise.add_noise = True @@ -78,40 +89,108 @@ def play(args: argparse.Namespace) -> None: ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg) policy = ppo_runner.get_inference_policy(device=env.device) + embodiment = ppo_runner.cfg["experiment_name"].lower() + # Export policy if needed if args.export_policy: path = os.path.join(".") export_policy_as_jit(ppo_runner.alg.actor_critic, path) print("Exported policy as jit script to: ", path) - # export policy as a onnx module (used to run it on web) - if args.export_onnx: - path = ppo_runner.load_path - embodiment = ppo_runner.cfg["experiment_name"].lower() - policy_cfg = ActorCfg( - embodiment=embodiment, - cycle_time=env_cfg.rewards.cycle_time, - sim_dt=env_cfg.sim.dt, - sim_decimation=env_cfg.control.decimation, - tau_factor=env_cfg.safety.torque_limit, - action_scale=env_cfg.control.action_scale, - lin_vel_scale=env_cfg.normalization.obs_scales.lin_vel, - ang_vel_scale=env_cfg.normalization.obs_scales.ang_vel, - quat_scale=env_cfg.normalization.obs_scales.quat, - dof_pos_scale=env_cfg.normalization.obs_scales.dof_pos, - dof_vel_scale=env_cfg.normalization.obs_scales.dof_vel, - frame_stack=env_cfg.env.frame_stack, - clip_observations=env_cfg.normalization.clip_observations, - clip_actions=env_cfg.normalization.clip_actions, - ) - actor_model, sim2sim_info, input_tensors = get_actor_policy(path, policy_cfg) - - # Merge policy_cfg and sim2sim_info into a single config object - export_config = {**vars(policy_cfg), **sim2sim_info} + # load the embodiment + robot = load_embodiment(embodiment) + JOINT_NAMES = robot.Robot.all_joints() + + input_schema = P.IOSchema( + values=[ + P.ValueSchema( + value_name="observation_buffer", + state_tensor=P.StateTensorSchema( + # 11 is the number of single observation features - 6 from IMU, 5 from command input + # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) + # -1 comes from the fact that the latest observation is the current state and not part of the buffer + shape=[(env_cfg.env.frame_stack - 1) * (11 + env.num_dof * 3)], + description="Buffer of previous observations" + ) + ), + P.ValueSchema( + value_name="vector_command", + vector_command=P.VectorCommandSchema( + dimensions=3, # x_vel, y_vel, rot + ) + ), + P.ValueSchema( + value_name="timestamp", + timestamp=P.TimestampSchema( + unit=P.TimestampUnit.SECONDS, + description="Current policy time in seconds" + ) + ), + P.ValueSchema( + value_name="joint_positions", + joint_positions=P.JointPositionsSchema( + joint_names=JOINT_NAMES, + unit=P.JointPositionUnit.RADIANS, + ) + ), + P.ValueSchema( + value_name="joint_velocities", + joint_velocities=P.JointVelocitiesSchema( + joint_names=JOINT_NAMES, + unit=P.JointVelocityUnit.RADIANS_PER_SECOND, + ) + ), + P.ValueSchema( + value_name="previous_actions", + joint_positions=P.JointPositionsSchema( + joint_names=JOINT_NAMES, + unit=P.JointPositionUnit.RADIANS + ) + ), + # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data + P.ValueSchema( + value_name="imu_angular_velocity", + imu=P.ImuSchema( + use_accelerometer=False, + use_gyroscope=True, + use_magnetometer=False, + ) + ), + P.ValueSchema( + value_name="imu_orientation", + imu=P.ImuSchema( + use_accelerometer=True, + use_gyroscope=False, + use_magnetometer=False, + ) + ) + ] + ) + + output_schema = P.IOSchema( + values=[ + P.ValueSchema( + value_name="joint_positions", + joint_pos=P.JointPosSchema( + joint_names=JOINT_NAMES, + unit=P.JointPosUnit.RADIANS, + ), + ) + ] + ) - export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_policy.onnx") - print("Exported policy as kinfer-compatible onnx to: ", path) + # Create the full model schema + model_schema = P.ModelSchema( + input_schema=input_schema, + output_schema=output_schema + ) + if args.export_onnx: + kinfer_policy = export_model( + model=policy, + schema=model_schema, + ) + kinfer_policy.save_model("policy.kinfer") # Prepare for logging env_logger = Logger(env.dt) robot_index = 0 diff --git a/sim/sim2sim.py b/sim/sim2sim.py index 3a9bb669..f47da5de 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -19,7 +19,10 @@ import torch # Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8 import sys -sys.path.append('third_party/kinfer') + +kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'third_party', 'kinfer')) +sys.path.append(kinfer_path) + from kinfer.export.pytorch import export_to_onnx from kinfer.inference.python import ONNXModel diff --git a/sim/utils/resources.py b/sim/utils/resources.py new file mode 100644 index 00000000..84aafb80 --- /dev/null +++ b/sim/utils/resources.py @@ -0,0 +1,9 @@ +import importlib +from typing import Any + +def load_embodiment(embodiment: str) -> Any: # noqa: ANN401 + # Dynamically import embodiment + module_name = f"sim.resources.{embodiment}.joints" + module = importlib.import_module(module_name) + robot = getattr(module, "Robot") + return robot From 6e4988da3ccca8afe2445966d434cf89879f170f Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sun, 29 Dec 2024 17:41:00 -0800 Subject: [PATCH 03/43] fix typo --- sim/play.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/play.py b/sim/play.py index c65cfc90..c1faac16 100755 --- a/sim/play.py +++ b/sim/play.py @@ -66,7 +66,7 @@ def play(args: argparse.Namespace) -> None: else: env_cfg.terrain.mesh_type = "plane" env_cfg.terrain.num_rows = 5 - env_cfg.terrain.num_cxols = 5 + env_cfg.terrain.num_cols = 5 env_cfg.terrain.curriculum = False env_cfg.terrain.max_init_terrain_level = 5 env_cfg.noise.add_noise = True From 983e010d542502e5b8eb0d2be1921c46ad2781d7 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sun, 29 Dec 2024 18:12:59 -0800 Subject: [PATCH 04/43] get jit --- sim/play.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sim/play.py b/sim/play.py index c1faac16..68d12d6c 100755 --- a/sim/play.py +++ b/sim/play.py @@ -48,10 +48,13 @@ def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None: os.makedirs(path, exist_ok=True) path = os.path.join(path, "policy_1.pt") + model = get_actor_jit(actor_critic) + model.save(path) + +def get_actor_jit(actor_critic: Any) -> Any: model = copy.deepcopy(actor_critic.actor).to("cpu") traced_script_module = torch.jit.script(model) - traced_script_module.save(path) - + return traced_script_module def play(args: argparse.Namespace) -> None: logger.info("Configuring environment and training settings...") @@ -186,8 +189,9 @@ def play(args: argparse.Namespace) -> None: ) if args.export_onnx: + jit_policy = get_actor_jit(policy) kinfer_policy = export_model( - model=policy, + model=jit_policy, schema=model_schema, ) kinfer_policy.save_model("policy.kinfer") From b41d3df9064a009c6de0c917ddc298f736a43759 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sun, 29 Dec 2024 18:26:57 -0800 Subject: [PATCH 05/43] save --- sim/play.py | 3 ++- sim/requirements.txt | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sim/play.py b/sim/play.py index 68d12d6c..56e86633 100755 --- a/sim/play.py +++ b/sim/play.py @@ -19,6 +19,7 @@ import numpy as np from isaacgym import gymapi from tqdm import tqdm +import onnx # Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8 import sys @@ -194,7 +195,7 @@ def play(args: argparse.Namespace) -> None: model=jit_policy, schema=model_schema, ) - kinfer_policy.save_model("policy.kinfer") + onnx.save(kinfer_policy, "policy.kinfer") # Prepare for logging env_logger = Logger(env.dt) robot_index = 0 diff --git a/sim/requirements.txt b/sim/requirements.txt index fbbdc4e6..ffac4a26 100755 --- a/sim/requirements.txt +++ b/sim/requirements.txt @@ -10,6 +10,7 @@ numpy==1.21.6 wandb tensorboard==2.14.0 onnxscript +onnx mujoco==2.3.6 kinfer==0.0.5 -opencv-python \ No newline at end of file +opencv-python From f627fb3ff327e54d26a99f353c6fd712667b68b9 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sun, 29 Dec 2024 18:27:59 -0800 Subject: [PATCH 06/43] lint --- sim/play.py | 50 ++++++++++++++++++------------------------ sim/sim2sim.py | 8 +++---- sim/utils/resources.py | 1 + 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/sim/play.py b/sim/play.py index 56e86633..d5b726bf 100755 --- a/sim/play.py +++ b/sim/play.py @@ -9,6 +9,9 @@ import logging import math import os + +# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8 +import sys import time import uuid from datetime import datetime @@ -17,22 +20,17 @@ import cv2 import h5py import numpy as np +import onnx from isaacgym import gymapi from tqdm import tqdm -import onnx - -# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8 -import sys # Get absolute path relative to this script -kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'third_party', 'kinfer')) +kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer")) sys.path.append(kinfer_path) -from kinfer import proto as P +from kinfer import proto as P from kinfer.export.python import export_model -from sim.utils.resources import load_embodiment - from sim.env import run_dir # noqa: E402 from sim.envs import task_registry # noqa: E402 @@ -40,6 +38,7 @@ from sim.model_export import ActorCfg, get_actor_policy from sim.utils.helpers import get_args # noqa: E402 from sim.utils.logger import Logger # noqa: E402 +from sim.utils.resources import load_embodiment import torch # special case with isort: skip comment @@ -52,11 +51,13 @@ def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> No model = get_actor_jit(actor_critic) model.save(path) + def get_actor_jit(actor_critic: Any) -> Any: model = copy.deepcopy(actor_critic.actor).to("cpu") traced_script_module = torch.jit.script(model) return traced_script_module + def play(args: argparse.Namespace) -> None: logger.info("Configuring environment and training settings...") env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) @@ -114,42 +115,36 @@ def play(args: argparse.Namespace) -> None: # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) # -1 comes from the fact that the latest observation is the current state and not part of the buffer shape=[(env_cfg.env.frame_stack - 1) * (11 + env.num_dof * 3)], - description="Buffer of previous observations" - ) + description="Buffer of previous observations", + ), ), P.ValueSchema( value_name="vector_command", vector_command=P.VectorCommandSchema( dimensions=3, # x_vel, y_vel, rot - ) + ), ), P.ValueSchema( value_name="timestamp", - timestamp=P.TimestampSchema( - unit=P.TimestampUnit.SECONDS, - description="Current policy time in seconds" - ) + timestamp=P.TimestampSchema(unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"), ), P.ValueSchema( value_name="joint_positions", joint_positions=P.JointPositionsSchema( joint_names=JOINT_NAMES, unit=P.JointPositionUnit.RADIANS, - ) + ), ), P.ValueSchema( - value_name="joint_velocities", + value_name="joint_velocities", joint_velocities=P.JointVelocitiesSchema( joint_names=JOINT_NAMES, unit=P.JointVelocityUnit.RADIANS_PER_SECOND, - ) + ), ), P.ValueSchema( value_name="previous_actions", - joint_positions=P.JointPositionsSchema( - joint_names=JOINT_NAMES, - unit=P.JointPositionUnit.RADIANS - ) + joint_positions=P.JointPositionsSchema(joint_names=JOINT_NAMES, unit=P.JointPositionUnit.RADIANS), ), # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data P.ValueSchema( @@ -158,7 +153,7 @@ def play(args: argparse.Namespace) -> None: use_accelerometer=False, use_gyroscope=True, use_magnetometer=False, - ) + ), ), P.ValueSchema( value_name="imu_orientation", @@ -166,8 +161,8 @@ def play(args: argparse.Namespace) -> None: use_accelerometer=True, use_gyroscope=False, use_magnetometer=False, - ) - ) + ), + ), ] ) @@ -184,10 +179,7 @@ def play(args: argparse.Namespace) -> None: ) # Create the full model schema - model_schema = P.ModelSchema( - input_schema=input_schema, - output_schema=output_schema - ) + model_schema = P.ModelSchema(input_schema=input_schema, output_schema=output_schema) if args.export_onnx: jit_policy = get_actor_jit(policy) diff --git a/sim/sim2sim.py b/sim/sim2sim.py index f47da5de..467113eb 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -7,6 +7,9 @@ import argparse import math import os + +# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8 +import sys from copy import deepcopy from dataclasses import dataclass from typing import Dict, List, Tuple, Union @@ -17,15 +20,12 @@ import onnxruntime as ort import pygame import torch -# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8 -import sys -kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'third_party', 'kinfer')) +kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer")) sys.path.append(kinfer_path) from kinfer.export.pytorch import export_to_onnx from kinfer.inference.python import ONNXModel - from scipy.spatial.transform import Rotation as R from tqdm import tqdm diff --git a/sim/utils/resources.py b/sim/utils/resources.py index 84aafb80..3d353f2c 100644 --- a/sim/utils/resources.py +++ b/sim/utils/resources.py @@ -1,6 +1,7 @@ import importlib from typing import Any + def load_embodiment(embodiment: str) -> Any: # noqa: ANN401 # Dynamically import embodiment module_name = f"sim.resources.{embodiment}.joints" From d36343784c5c1fae5affc8000893f222cd8fe077 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sun, 29 Dec 2024 20:35:30 -0800 Subject: [PATCH 07/43] move schema definition --- sim/envs/base/legged_robot_config.py | 9 ++++ sim/envs/humanoids/gpr_config.py | 71 ++++++++++++++++++++++++ sim/play.py | 81 ++-------------------------- 3 files changed, 83 insertions(+), 78 deletions(-) diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py index e87d2252..f0854b26 100644 --- a/sim/envs/base/legged_robot_config.py +++ b/sim/envs/base/legged_robot_config.py @@ -30,9 +30,16 @@ # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. from enum import Enum +import os from sim.envs.base.base_config import BaseConfig +import sys + +kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer")) +sys.path.append(kinfer_path) +from kinfer import proto + class LeggedRobotCfg(BaseConfig): class env: @@ -44,6 +51,8 @@ class env: send_timeouts = True # send time out information to the algorithm episode_length_s = 20 # episode length in seconds + P = proto + class terrain: mesh_type = "trimesh" # "heightfield" # none, plane, heightfield or trimesh horizontal_scale = 0.1 # [m] diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index c59372d3..134c376c 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -26,6 +26,77 @@ class env(LeggedRobotCfg.env): episode_length_s = 24 # episode length in seconds use_ref_actions = False + P = LeggedRobotCfg.env.P + + input_schema = P.IOSchema( + values=[ + P.ValueSchema( + value_name="observation_buffer", + state_tensor=P.StateTensorSchema( + # 11 is the number of single observation features - 6 from IMU, 5 from command input + # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) + # -1 comes from the fact that the latest observation is the current state and not part of the buffer + shape=[(frame_stack - 1) * (11 + NUM_JOINTS * 3)], + description="Buffer of previous observations", + ), + ), + P.ValueSchema( + value_name="vector_command", + vector_command=P.VectorCommandSchema( + dimensions=3, # x_vel, y_vel, rot + ), + ), + P.ValueSchema( + value_name="timestamp", + timestamp=P.TimestampSchema(unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"), + ), + P.ValueSchema( + value_name="joint_positions", + joint_positions=P.JointPositionsSchema( + joint_names=Robot.all_joints(), + unit=P.JointPositionUnit.RADIANS, + ), + ), + P.ValueSchema( + value_name="joint_velocities", + joint_velocities=P.JointVelocitiesSchema( + joint_names=Robot.all_joints(), + unit=P.JointVelocityUnit.RADIANS_PER_SECOND, + ), + ), + P.ValueSchema( + value_name="previous_actions", + joint_positions=P.JointPositionsSchema(joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS), + ), + # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data + P.ValueSchema( + value_name="imu_angular_velocity", + imu=P.ImuSchema( + use_accelerometer=False, + use_gyroscope=True, + use_magnetometer=False, + ), + ), + P.ValueSchema( + value_name="imu_orientation", + imu=P.ImuSchema( + use_accelerometer=True, + use_gyroscope=False, + use_magnetometer=False, + ), + ), + ] + ) + + output_schema = P.IOSchema( + values=[ + P.ValueSchema( + value_name="joint_positions", + joint_positions=P.JointPositionsSchema(joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS), + ) + ] + ) + class safety(LeggedRobotCfg.safety): # safety factors pos_limit = 1.0 diff --git a/sim/play.py b/sim/play.py index d5b726bf..aa1041ab 100755 --- a/sim/play.py +++ b/sim/play.py @@ -94,92 +94,17 @@ def play(args: argparse.Namespace) -> None: ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg) policy = ppo_runner.get_inference_policy(device=env.device) - embodiment = ppo_runner.cfg["experiment_name"].lower() - # Export policy if needed if args.export_policy: path = os.path.join(".") export_policy_as_jit(ppo_runner.alg.actor_critic, path) print("Exported policy as jit script to: ", path) - # load the embodiment - robot = load_embodiment(embodiment) - JOINT_NAMES = robot.Robot.all_joints() - - input_schema = P.IOSchema( - values=[ - P.ValueSchema( - value_name="observation_buffer", - state_tensor=P.StateTensorSchema( - # 11 is the number of single observation features - 6 from IMU, 5 from command input - # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) - # -1 comes from the fact that the latest observation is the current state and not part of the buffer - shape=[(env_cfg.env.frame_stack - 1) * (11 + env.num_dof * 3)], - description="Buffer of previous observations", - ), - ), - P.ValueSchema( - value_name="vector_command", - vector_command=P.VectorCommandSchema( - dimensions=3, # x_vel, y_vel, rot - ), - ), - P.ValueSchema( - value_name="timestamp", - timestamp=P.TimestampSchema(unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"), - ), - P.ValueSchema( - value_name="joint_positions", - joint_positions=P.JointPositionsSchema( - joint_names=JOINT_NAMES, - unit=P.JointPositionUnit.RADIANS, - ), - ), - P.ValueSchema( - value_name="joint_velocities", - joint_velocities=P.JointVelocitiesSchema( - joint_names=JOINT_NAMES, - unit=P.JointVelocityUnit.RADIANS_PER_SECOND, - ), - ), - P.ValueSchema( - value_name="previous_actions", - joint_positions=P.JointPositionsSchema(joint_names=JOINT_NAMES, unit=P.JointPositionUnit.RADIANS), - ), - # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data - P.ValueSchema( - value_name="imu_angular_velocity", - imu=P.ImuSchema( - use_accelerometer=False, - use_gyroscope=True, - use_magnetometer=False, - ), - ), - P.ValueSchema( - value_name="imu_orientation", - imu=P.ImuSchema( - use_accelerometer=True, - use_gyroscope=False, - use_magnetometer=False, - ), - ), - ] - ) - - output_schema = P.IOSchema( - values=[ - P.ValueSchema( - value_name="joint_positions", - joint_pos=P.JointPosSchema( - joint_names=JOINT_NAMES, - unit=P.JointPosUnit.RADIANS, - ), - ) - ] - ) + if env_cfg.env.input_schema is None or env_cfg.env.output_schema is None: + raise ValueError("Input or output schema is None") # Create the full model schema - model_schema = P.ModelSchema(input_schema=input_schema, output_schema=output_schema) + model_schema = P.ModelSchema(input_schema=env_cfg.env.input_schema, output_schema=env_cfg.env.output_schema) if args.export_onnx: jit_policy = get_actor_jit(policy) From 9942d79f9ff4fd2f576953e5310f002daeff01c9 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sun, 29 Dec 2024 20:36:23 -0800 Subject: [PATCH 08/43] fmt --- sim/envs/base/legged_robot_config.py | 5 +- sim/envs/humanoids/gpr_config.py | 104 ++++++++++++++------------- 2 files changed, 57 insertions(+), 52 deletions(-) diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py index f0854b26..44a747af 100644 --- a/sim/envs/base/legged_robot_config.py +++ b/sim/envs/base/legged_robot_config.py @@ -29,13 +29,12 @@ # # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. -from enum import Enum import os +import sys +from enum import Enum from sim.envs.base.base_config import BaseConfig -import sys - kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer")) sys.path.append(kinfer_path) from kinfer import proto diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 134c376c..cf569de3 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -32,67 +32,73 @@ class env(LeggedRobotCfg.env): values=[ P.ValueSchema( value_name="observation_buffer", - state_tensor=P.StateTensorSchema( - # 11 is the number of single observation features - 6 from IMU, 5 from command input - # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) - # -1 comes from the fact that the latest observation is the current state and not part of the buffer - shape=[(frame_stack - 1) * (11 + NUM_JOINTS * 3)], - description="Buffer of previous observations", - ), + state_tensor=P.StateTensorSchema( + # 11 is the number of single observation features - 6 from IMU, 5 from command input + # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) + # -1 comes from the fact that the latest observation is the current state and not part of the buffer + shape=[(frame_stack - 1) * (11 + NUM_JOINTS * 3)], + description="Buffer of previous observations", ), - P.ValueSchema( - value_name="vector_command", - vector_command=P.VectorCommandSchema( - dimensions=3, # x_vel, y_vel, rot - ), + ), + P.ValueSchema( + value_name="vector_command", + vector_command=P.VectorCommandSchema( + dimensions=3, # x_vel, y_vel, rot ), - P.ValueSchema( - value_name="timestamp", - timestamp=P.TimestampSchema(unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds"), + ), + P.ValueSchema( + value_name="timestamp", + timestamp=P.TimestampSchema( + unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds" ), - P.ValueSchema( - value_name="joint_positions", - joint_positions=P.JointPositionsSchema( - joint_names=Robot.all_joints(), - unit=P.JointPositionUnit.RADIANS, - ), + ), + P.ValueSchema( + value_name="joint_positions", + joint_positions=P.JointPositionsSchema( + joint_names=Robot.all_joints(), + unit=P.JointPositionUnit.RADIANS, ), - P.ValueSchema( - value_name="joint_velocities", - joint_velocities=P.JointVelocitiesSchema( - joint_names=Robot.all_joints(), - unit=P.JointVelocityUnit.RADIANS_PER_SECOND, - ), + ), + P.ValueSchema( + value_name="joint_velocities", + joint_velocities=P.JointVelocitiesSchema( + joint_names=Robot.all_joints(), + unit=P.JointVelocityUnit.RADIANS_PER_SECOND, ), - P.ValueSchema( - value_name="previous_actions", - joint_positions=P.JointPositionsSchema(joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS), + ), + P.ValueSchema( + value_name="previous_actions", + joint_positions=P.JointPositionsSchema( + joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS ), - # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data - P.ValueSchema( - value_name="imu_angular_velocity", - imu=P.ImuSchema( - use_accelerometer=False, - use_gyroscope=True, - use_magnetometer=False, - ), + ), + # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data + P.ValueSchema( + value_name="imu_angular_velocity", + imu=P.ImuSchema( + use_accelerometer=False, + use_gyroscope=True, + use_magnetometer=False, ), - P.ValueSchema( - value_name="imu_orientation", - imu=P.ImuSchema( - use_accelerometer=True, - use_gyroscope=False, - use_magnetometer=False, - ), + ), + P.ValueSchema( + value_name="imu_orientation", + imu=P.ImuSchema( + use_accelerometer=True, + use_gyroscope=False, + use_magnetometer=False, ), - ] - ) - + ), + ] + ) + output_schema = P.IOSchema( values=[ P.ValueSchema( value_name="joint_positions", - joint_positions=P.JointPositionsSchema(joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS), + joint_positions=P.JointPositionsSchema( + joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS + ), ) ] ) From 64d42c11dd89692cbc3c018d4c93d9f26da3cff3 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 12:45:06 -0800 Subject: [PATCH 09/43] fix schema --- sim/envs/humanoids/gpr_config.py | 42 ++++++++++++++++++++------------ sim/model_export.py | 6 ++--- sim/play.py | 19 ++++++++++++++- 3 files changed, 48 insertions(+), 19 deletions(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index cf569de3..658f0dc3 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -30,16 +30,6 @@ class env(LeggedRobotCfg.env): input_schema = P.IOSchema( values=[ - P.ValueSchema( - value_name="observation_buffer", - state_tensor=P.StateTensorSchema( - # 11 is the number of single observation features - 6 from IMU, 5 from command input - # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) - # -1 comes from the fact that the latest observation is the current state and not part of the buffer - shape=[(frame_stack - 1) * (11 + NUM_JOINTS * 3)], - description="Buffer of previous observations", - ), - ), P.ValueSchema( value_name="vector_command", vector_command=P.VectorCommandSchema( @@ -47,7 +37,7 @@ class env(LeggedRobotCfg.env): ), ), P.ValueSchema( - value_name="timestamp", + value_name="t", timestamp=P.TimestampSchema( unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds" ), @@ -74,7 +64,7 @@ class env(LeggedRobotCfg.env): ), # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data P.ValueSchema( - value_name="imu_angular_velocity", + value_name="imu_ang_vel", imu=P.ImuSchema( use_accelerometer=False, use_gyroscope=True, @@ -82,24 +72,46 @@ class env(LeggedRobotCfg.env): ), ), P.ValueSchema( - value_name="imu_orientation", + value_name="imu_euler_xyz", imu=P.ImuSchema( use_accelerometer=True, use_gyroscope=False, use_magnetometer=False, ), ), + P.ValueSchema( + value_name="buffer", + state_tensor=P.StateTensorSchema( + # 11 is the number of single observation features - 6 from IMU, 5 from command input + # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) + shape=[frame_stack * (11 + NUM_JOINTS * 3)], + description="Buffer of previous observations", + ), + ), ] ) output_schema = P.IOSchema( values=[ P.ValueSchema( - value_name="joint_positions", + value_name="actions", + joint_positions=P.JointPositionsSchema( + joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS + ), + ), + P.ValueSchema( + value_name="actions_raw", joint_positions=P.JointPositionsSchema( joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS ), - ) + ), + P.ValueSchema( + value_name="buffer", + state_tensor=P.StateTensorSchema( + shape=[frame_stack * (11 + NUM_JOINTS * 3)], + description="Updated buffer of observations", + ), + ), ] ) diff --git a/sim/model_export.py b/sim/model_export.py index 3777124a..64362b67 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -125,9 +125,7 @@ def get_init_buffer(self) -> Tensor: def forward( self, - x_vel: Tensor, # x-coordinate of the target velocity - y_vel: Tensor, # y-coordinate of the target velocity - rot: Tensor, # target angular velocity + vector_command: Tensor, # (x_vel, y_vel, rot) t: Tensor, # current policy time (sec) dof_pos: Tensor, # current angular position of the DoFs relative to default dof_vel: Tensor, # current angular velocity of the DoFs @@ -163,6 +161,8 @@ def forward( sin_pos = torch.sin(2 * torch.pi * t / self.cycle_time) cos_pos = torch.cos(2 * torch.pi * t / self.cycle_time) + x_vel, y_vel, rot = vector_command.split(1) + # Construct command input command_input = torch.cat( ( diff --git a/sim/play.py b/sim/play.py index aa1041ab..e905fcd6 100755 --- a/sim/play.py +++ b/sim/play.py @@ -107,10 +107,27 @@ def play(args: argparse.Namespace) -> None: model_schema = P.ModelSchema(input_schema=env_cfg.env.input_schema, output_schema=env_cfg.env.output_schema) if args.export_onnx: - jit_policy = get_actor_jit(policy) + actor_cfg = ActorCfg( + embodiment=ppo_runner.cfg["experiment_name"].lower(), + cycle_time=env_cfg.rewards.cycle_time, + sim_dt=env_cfg.sim.dt, + sim_decimation=env_cfg.control.decimation, + tau_factor=env_cfg.safety.torque_limit, + action_scale=env_cfg.control.action_scale, + lin_vel_scale=env_cfg.normalization.obs_scales.lin_vel, + ang_vel_scale=env_cfg.normalization.obs_scales.ang_vel, + quat_scale=env_cfg.normalization.obs_scales.quat, + dof_pos_scale=env_cfg.normalization.obs_scales.dof_pos, + dof_vel_scale=env_cfg.normalization.obs_scales.dof_vel, + frame_stack=env_cfg.env.frame_stack, + clip_observations=env_cfg.normalization.clip_observations, + clip_actions=env_cfg.normalization.clip_actions, + ) + jit_policy, metadata, _ = get_actor_policy(ppo_runner.load_path, actor_cfg) kinfer_policy = export_model( model=jit_policy, schema=model_schema, + metadata=metadata, ) onnx.save(kinfer_policy, "policy.kinfer") # Prepare for logging From 3f04f24120b37bf818bc337d4357f0903d152b5c Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 14:38:01 -0800 Subject: [PATCH 10/43] minor fixes --- sim/envs/humanoids/gpr_config.py | 4 +--- sim/play.py | 2 +- sim/utils/helpers.py | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 658f0dc3..680f4626 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -39,7 +39,7 @@ class env(LeggedRobotCfg.env): P.ValueSchema( value_name="t", timestamp=P.TimestampSchema( - unit=P.TimestampUnit.SECONDS, description="Current policy time in seconds" + start_seconds=0, ), ), P.ValueSchema( @@ -85,7 +85,6 @@ class env(LeggedRobotCfg.env): # 11 is the number of single observation features - 6 from IMU, 5 from command input # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) shape=[frame_stack * (11 + NUM_JOINTS * 3)], - description="Buffer of previous observations", ), ), ] @@ -109,7 +108,6 @@ class env(LeggedRobotCfg.env): value_name="buffer", state_tensor=P.StateTensorSchema( shape=[frame_stack * (11 + NUM_JOINTS * 3)], - description="Updated buffer of observations", ), ), ] diff --git a/sim/play.py b/sim/play.py index e905fcd6..71579476 100755 --- a/sim/play.py +++ b/sim/play.py @@ -29,7 +29,7 @@ sys.path.append(kinfer_path) from kinfer import proto as P -from kinfer.export.python import export_model +from kinfer.export.pytorch import export_model from sim.env import run_dir # noqa: E402 from sim.envs import task_registry # noqa: E402 diff --git a/sim/utils/helpers.py b/sim/utils/helpers.py index 2724896f..e985cb72 100755 --- a/sim/utils/helpers.py +++ b/sim/utils/helpers.py @@ -47,7 +47,8 @@ def class_to_dict(obj) -> dict: return obj result = {} for key in dir(obj): - if key.startswith("_"): + # Skip private attributes and known problematic ones + if key.startswith("_") or key in ["P", "input_schema", "output_schema"]: continue element = [] val = getattr(obj, key) From 8f904caed6f2844a1dc40ee42ee5e775cce2b37f Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 14:39:21 -0800 Subject: [PATCH 11/43] fix names --- sim/envs/humanoids/gpr_config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 680f4626..9dc62731 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -43,21 +43,21 @@ class env(LeggedRobotCfg.env): ), ), P.ValueSchema( - value_name="joint_positions", + value_name="dof_pos", joint_positions=P.JointPositionsSchema( joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS, ), ), P.ValueSchema( - value_name="joint_velocities", + value_name="dof_vel", joint_velocities=P.JointVelocitiesSchema( joint_names=Robot.all_joints(), unit=P.JointVelocityUnit.RADIANS_PER_SECOND, ), ), P.ValueSchema( - value_name="previous_actions", + value_name="prev_actions", joint_positions=P.JointPositionsSchema( joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS ), From 8f120bb94e571836191597f51dbfa991bca0fc2c Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 15:07:58 -0800 Subject: [PATCH 12/43] tensors --- sim/envs/humanoids/gpr_config.py | 2 ++ sim/model_export.py | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 9dc62731..24611fd3 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -85,6 +85,7 @@ class env(LeggedRobotCfg.env): # 11 is the number of single observation features - 6 from IMU, 5 from command input # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) shape=[frame_stack * (11 + NUM_JOINTS * 3)], + dtype=P.DType.FP32, ), ), ] @@ -108,6 +109,7 @@ class env(LeggedRobotCfg.env): value_name="buffer", state_tensor=P.StateTensorSchema( shape=[frame_stack * (11 + NUM_JOINTS * 3)], + dtype=P.DType.FP32, ), ), ] diff --git a/sim/model_export.py b/sim/model_export.py index 64362b67..bc53a602 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -3,7 +3,7 @@ import re from dataclasses import dataclass, fields from io import BytesIO -from typing import List, Optional, Tuple +from typing import Dict, List, Optional, Tuple import onnx import onnxruntime as ort @@ -133,7 +133,7 @@ def forward( imu_ang_vel: Tensor, # angular velocity of the IMU imu_euler_xyz: Tensor, # euler angles of the IMU buffer: Tensor, # buffer of previous observations - ) -> Tuple[Tensor, Tensor, Tensor]: + ) -> Dict[str, Tensor]: """Runs the actor model forward pass. Args: @@ -206,7 +206,9 @@ def forward( actions = self.policy(policy_input).squeeze(0) actions_scaled = actions * self.action_scale - return actions_scaled, actions, x + return {"actions": actions_scaled, + "actions_raw": actions, + "buffer": x} def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]: From 31dd3790f36ecc66f1adc71173047ece3fc4c780 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 15:09:22 -0800 Subject: [PATCH 13/43] squeeze --- sim/model_export.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sim/model_export.py b/sim/model_export.py index bc53a602..2c645c63 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -186,8 +186,8 @@ def forward( q, dq, prev_actions, - imu_ang_vel * self.ang_vel_scale, - imu_euler_xyz * self.quat_scale, + imu_ang_vel.squeeze(0) * self.ang_vel_scale, + imu_euler_xyz.squeeze(0) * self.quat_scale, ), dim=0, ) From 7a0a1c394866d28fe756c062de76c7fb23595803 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 15:17:53 -0800 Subject: [PATCH 14/43] fix imports --- sim/envs/base/legged_robot_config.py | 6 +----- sim/envs/humanoids/gpr_config.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py index 44a747af..e427c99f 100644 --- a/sim/envs/base/legged_robot_config.py +++ b/sim/envs/base/legged_robot_config.py @@ -35,10 +35,8 @@ from sim.envs.base.base_config import BaseConfig -kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer")) +kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "third_party", "kinfer")) sys.path.append(kinfer_path) -from kinfer import proto - class LeggedRobotCfg(BaseConfig): class env: @@ -50,8 +48,6 @@ class env: send_timeouts = True # send time out information to the algorithm episode_length_s = 20 # episode length in seconds - P = proto - class terrain: mesh_type = "trimesh" # "heightfield" # none, plane, heightfield or trimesh horizontal_scale = 0.1 # [m] diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 24611fd3..78637d03 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -26,7 +26,7 @@ class env(LeggedRobotCfg.env): episode_length_s = 24 # episode length in seconds use_ref_actions = False - P = LeggedRobotCfg.env.P + from kinfer import proto as P input_schema = P.IOSchema( values=[ From a289ee8c0eaeeec412dbc142fb2abe3b90b16276 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 15:25:50 -0800 Subject: [PATCH 15/43] pin 3.8 compatible kinfer --- third_party/kinfer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/kinfer b/third_party/kinfer index acfa5f67..13e24b7b 160000 --- a/third_party/kinfer +++ b/third_party/kinfer @@ -1 +1 @@ -Subproject commit acfa5f67aa26fedfc77c724358d3e4b35df7c365 +Subproject commit 13e24b7bc5104dc6be8a6e49c95912ab7c80aaea From 26466b809612dee7486b283484028b97dbe5f66d Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 15:28:52 -0800 Subject: [PATCH 16/43] lint --- sim/envs/base/legged_robot_config.py | 1 + sim/model_export.py | 8 ++------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py index e427c99f..5e8b7a7c 100644 --- a/sim/envs/base/legged_robot_config.py +++ b/sim/envs/base/legged_robot_config.py @@ -38,6 +38,7 @@ kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "third_party", "kinfer")) sys.path.append(kinfer_path) + class LeggedRobotCfg(BaseConfig): class env: num_envs = 4096 diff --git a/sim/model_export.py b/sim/model_export.py index 2c645c63..a7d384c0 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -137,9 +137,7 @@ def forward( """Runs the actor model forward pass. Args: - x_vel: The x-coordinate of the target velocity, with shape (1). - y_vel: The y-coordinate of the target velocity, with shape (1). - rot: The target angular velocity, with shape (1). + vector_command: The target velocity vector, with shape (3). It consistes of x_vel, y_vel, and rot. t: The current policy time step, with shape (1). dof_pos: The current angular position of the DoFs relative to default, with shape (num_actions). dof_vel: The current angular velocity of the DoFs, with shape (num_actions). @@ -206,9 +204,7 @@ def forward( actions = self.policy(policy_input).squeeze(0) actions_scaled = actions * self.action_scale - return {"actions": actions_scaled, - "actions_raw": actions, - "buffer": x} + return {"actions": actions_scaled, "actions_raw": actions, "buffer": x} def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]: From d041f50a0a2c3d32d62b691711674398a012768d Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 21:16:09 -0800 Subject: [PATCH 17/43] joints --- sim/resources/gpr/joints.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py index 3c4dd5d5..f3e937ee 100755 --- a/sim/resources/gpr/joints.py +++ b/sim/resources/gpr/joints.py @@ -155,6 +155,16 @@ def stiffness(cls) -> Dict[str, float]: "ankle_y": 40, } + @classmethod + def stiffness_mapping(cls) -> Dict[str, float]: + mapping = {} + stiffness = cls.stiffness() + breakpoint() + for side in ["L", "R"]: + for joint, value in stiffness.items(): + mapping[f"{side}_{joint}"] = value + return mapping + # d_gains @classmethod def damping(cls) -> Dict[str, float]: From 4a608b03325a9fcf588f0ccb9201b2188f93302a Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 21:59:45 -0800 Subject: [PATCH 18/43] update local kinfer --- third_party/kinfer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/kinfer b/third_party/kinfer index 13e24b7b..e05bfab1 160000 --- a/third_party/kinfer +++ b/third_party/kinfer @@ -1 +1 @@ -Subproject commit 13e24b7bc5104dc6be8a6e49c95912ab7c80aaea +Subproject commit e05bfab1cde94e2ff607b3dff7a5604ef0edcd39 From 07a9b242ee14e6fcdb1a9c8ed675597c39587abf Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 30 Dec 2024 22:02:29 -0800 Subject: [PATCH 19/43] update metadata + path --- sim/envs/base/legged_robot_config.py | 4 --- sim/model_export.py | 39 +++++++++++++++++++--------- sim/play.py | 4 --- sim/resources/gpr/joints.py | 19 +++++++++++++- sim/sim2sim.py | 3 --- 5 files changed, 45 insertions(+), 24 deletions(-) diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py index 5e8b7a7c..265ff995 100644 --- a/sim/envs/base/legged_robot_config.py +++ b/sim/envs/base/legged_robot_config.py @@ -35,10 +35,6 @@ from sim.envs.base.base_config import BaseConfig -kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "third_party", "kinfer")) -sys.path.append(kinfer_path) - - class LeggedRobotCfg(BaseConfig): class env: num_envs = 4096 diff --git a/sim/model_export.py b/sim/model_export.py index a7d384c0..26d2a4d7 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -237,24 +237,39 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, imu_ang_vel, imu_euler_xyz, buffer) # Add sim2sim metadata - robot_effort = list(a_model.robot.effort().values()) - robot_stiffness = list(a_model.robot.stiffness().values()) - robot_damping = list(a_model.robot.damping().values()) + robot = a_model.robot + robot_effort = robot.effort_mapping() + robot_stiffness = robot.stiffness_mapping() + robot_damping = robot.damping_mapping() num_actions = a_model.num_actions num_observations = a_model.num_observations - default_standing = list(a_model.robot.default_standing().values()) + default_standing = robot.default_standing() + + metadata = { + "num_actions": num_actions, + "num_observations": num_observations, + "robot_effort": robot_effort, + "robot_stiffness": robot_stiffness, + "robot_damping": robot_damping, + "default_standing": default_standing, + "sim_dt": cfg.sim_dt, + "sim_decimation": cfg.sim_decimation, + "tau_factor": cfg.tau_factor, + "action_scale": cfg.action_scale, + "lin_vel_scale": cfg.lin_vel_scale, + "ang_vel_scale": cfg.ang_vel_scale, + "quat_scale": cfg.quat_scale, + "dof_pos_scale": cfg.dof_pos_scale, + "dof_vel_scale": cfg.dof_vel_scale, + "frame_stack": cfg.frame_stack, + "clip_observations": cfg.clip_observations, + "clip_actions": cfg.clip_actions, + } return ( a_model, - { - "robot_effort": robot_effort, - "robot_stiffness": robot_stiffness, - "robot_damping": robot_damping, - "num_actions": num_actions, - "num_observations": num_observations, - "default_standing": default_standing, - }, + metadata, input_tensors, ) diff --git a/sim/play.py b/sim/play.py index 71579476..009019fc 100755 --- a/sim/play.py +++ b/sim/play.py @@ -24,10 +24,6 @@ from isaacgym import gymapi from tqdm import tqdm -# Get absolute path relative to this script -kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer")) -sys.path.append(kinfer_path) - from kinfer import proto as P from kinfer.export.pytorch import export_model diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py index f3e937ee..971f2733 100755 --- a/sim/resources/gpr/joints.py +++ b/sim/resources/gpr/joints.py @@ -159,7 +159,6 @@ def stiffness(cls) -> Dict[str, float]: def stiffness_mapping(cls) -> Dict[str, float]: mapping = {} stiffness = cls.stiffness() - breakpoint() for side in ["L", "R"]: for joint, value in stiffness.items(): mapping[f"{side}_{joint}"] = value @@ -176,6 +175,15 @@ def damping(cls) -> Dict[str, float]: "ankle_y": 5, } + @classmethod + def damping_mapping(cls) -> Dict[str, float]: + mapping = {} + damping = cls.damping() + for side in ["L", "R"]: + for joint, value in damping.items(): + mapping[f"{side}_{joint}"] = value + return mapping + # effort_limits @classmethod def effort(cls) -> Dict[str, float]: @@ -187,6 +195,15 @@ def effort(cls) -> Dict[str, float]: "ankle_y": 17, } + @classmethod + def effort_mapping(cls) -> Dict[str, float]: + mapping = {} + effort = cls.effort() + for side in ["L", "R"]: + for joint, value in effort.items(): + mapping[f"{side}_{joint}"] = value + return mapping + # vel_limits @classmethod def velocity(cls) -> Dict[str, float]: diff --git a/sim/sim2sim.py b/sim/sim2sim.py index 467113eb..a5dad736 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -21,9 +21,6 @@ import pygame import torch -kinfer_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "third_party", "kinfer")) -sys.path.append(kinfer_path) - from kinfer.export.pytorch import export_to_onnx from kinfer.inference.python import ONNXModel from scipy.spatial.transform import Rotation as R From 262198d406bb90ecd9731a126c980391e4550007 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Tue, 31 Dec 2024 01:41:29 -0800 Subject: [PATCH 20/43] clean and rename --- sim/envs/humanoids/gpr_config.py | 4 ++-- sim/model_export.py | 6 +++--- sim/play.py | 4 ---- sim/sim2sim.py | 2 -- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 78637d03..a75b0ec1 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -80,7 +80,7 @@ class env(LeggedRobotCfg.env): ), ), P.ValueSchema( - value_name="buffer", + value_name="state_tensor", state_tensor=P.StateTensorSchema( # 11 is the number of single observation features - 6 from IMU, 5 from command input # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) @@ -106,7 +106,7 @@ class env(LeggedRobotCfg.env): ), ), P.ValueSchema( - value_name="buffer", + value_name="state_tensor", state_tensor=P.StateTensorSchema( shape=[frame_stack * (11 + NUM_JOINTS * 3)], dtype=P.DType.FP32, diff --git a/sim/model_export.py b/sim/model_export.py index 26d2a4d7..60a4ceaa 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -132,7 +132,7 @@ def forward( prev_actions: Tensor, # previous actions taken by the model imu_ang_vel: Tensor, # angular velocity of the IMU imu_euler_xyz: Tensor, # euler angles of the IMU - buffer: Tensor, # buffer of previous observations + state_tensor: Tensor, # buffer of previous observations ) -> Dict[str, Tensor]: """Runs the actor model forward pass. @@ -194,7 +194,7 @@ def forward( new_x = torch.clamp(new_x, -self.clip_observations, self.clip_observations) # Add the new frame to the buffer - x = torch.cat((buffer, new_x), dim=0) + x = torch.cat((state_tensor, new_x), dim=0) # Pop the oldest frame x = x[self.num_single_obs :] @@ -204,7 +204,7 @@ def forward( actions = self.policy(policy_input).squeeze(0) actions_scaled = actions * self.action_scale - return {"actions": actions_scaled, "actions_raw": actions, "buffer": x} + return {"actions": actions_scaled, "actions_raw": actions, "state_tensor": x} def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]: diff --git a/sim/play.py b/sim/play.py index 009019fc..955d094a 100755 --- a/sim/play.py +++ b/sim/play.py @@ -10,10 +10,6 @@ import math import os -# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8 -import sys -import time -import uuid from datetime import datetime from typing import Any, Union diff --git a/sim/sim2sim.py b/sim/sim2sim.py index a5dad736..b21507cc 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -8,8 +8,6 @@ import math import os -# Add local kinfer to the path because kinfer requires 3.11 and we are using 3.8 -import sys from copy import deepcopy from dataclasses import dataclass from typing import Dict, List, Tuple, Union From 0c583dbd19988f07e5c32230899eff2694c0bc58 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Tue, 31 Dec 2024 01:43:25 -0800 Subject: [PATCH 21/43] lint --- sim/envs/base/legged_robot_config.py | 1 + sim/model_export.py | 2 +- sim/play.py | 4 +--- sim/sim2sim.py | 2 -- 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py index 265ff995..71428b71 100644 --- a/sim/envs/base/legged_robot_config.py +++ b/sim/envs/base/legged_robot_config.py @@ -35,6 +35,7 @@ from sim.envs.base.base_config import BaseConfig + class LeggedRobotCfg(BaseConfig): class env: num_envs = 4096 diff --git a/sim/model_export.py b/sim/model_export.py index 60a4ceaa..d7a93abf 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -147,7 +147,7 @@ def forward( imu_euler_xyz: The euler angles of the IMU, with shape (3), in radians. "XYZ" means (roll, pitch, yaw). If IMU is not used, can be all zeros. - buffer: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is + state_tensor: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is the return value of the previous forward pass. On the first pass, it should be all zeros. diff --git a/sim/play.py b/sim/play.py index 955d094a..1726cf76 100755 --- a/sim/play.py +++ b/sim/play.py @@ -9,7 +9,6 @@ import logging import math import os - from datetime import datetime from typing import Any, Union @@ -18,10 +17,9 @@ import numpy as np import onnx from isaacgym import gymapi -from tqdm import tqdm - from kinfer import proto as P from kinfer.export.pytorch import export_model +from tqdm import tqdm from sim.env import run_dir # noqa: E402 from sim.envs import task_registry # noqa: E402 diff --git a/sim/sim2sim.py b/sim/sim2sim.py index b21507cc..82ce1137 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -7,7 +7,6 @@ import argparse import math import os - from copy import deepcopy from dataclasses import dataclass from typing import Dict, List, Tuple, Union @@ -18,7 +17,6 @@ import onnxruntime as ort import pygame import torch - from kinfer.export.pytorch import export_to_onnx from kinfer.inference.python import ONNXModel from scipy.spatial.transform import Rotation as R From ce4d7291cb81a029d5107f9f40b01da131b98310 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Tue, 31 Dec 2024 12:55:29 -0800 Subject: [PATCH 22/43] rename --- sim/envs/humanoids/gpr_config.py | 2 +- sim/model_export.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index a75b0ec1..c867139f 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -37,7 +37,7 @@ class env(LeggedRobotCfg.env): ), ), P.ValueSchema( - value_name="t", + value_name="timestamp", timestamp=P.TimestampSchema( start_seconds=0, ), diff --git a/sim/model_export.py b/sim/model_export.py index d7a93abf..b8a1bbc0 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -126,7 +126,7 @@ def get_init_buffer(self) -> Tensor: def forward( self, vector_command: Tensor, # (x_vel, y_vel, rot) - t: Tensor, # current policy time (sec) + timestamp: Tensor, # current policy time (sec) dof_pos: Tensor, # current angular position of the DoFs relative to default dof_vel: Tensor, # current angular velocity of the DoFs prev_actions: Tensor, # previous actions taken by the model @@ -138,7 +138,7 @@ def forward( Args: vector_command: The target velocity vector, with shape (3). It consistes of x_vel, y_vel, and rot. - t: The current policy time step, with shape (1). + timestamp: The current policy time step, with shape (1). dof_pos: The current angular position of the DoFs relative to default, with shape (num_actions). dof_vel: The current angular velocity of the DoFs, with shape (num_actions). prev_actions: The previous actions taken by the model, with shape (num_actions). @@ -156,8 +156,8 @@ def forward( actions: The actions to take, with shape (num_actions). x: The new buffer of observations, with shape (frame_stack * num_single_obs). """ - sin_pos = torch.sin(2 * torch.pi * t / self.cycle_time) - cos_pos = torch.cos(2 * torch.pi * t / self.cycle_time) + sin_pos = torch.sin(2 * torch.pi * timestamp / self.cycle_time) + cos_pos = torch.cos(2 * torch.pi * timestamp / self.cycle_time) x_vel, y_vel, rot = vector_command.split(1) From 9b0738ab2da28b0ba8726bafa38e8f83ea6b9853 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 14:06:39 -0800 Subject: [PATCH 23/43] push kinfer --- third_party/kinfer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/kinfer b/third_party/kinfer index e05bfab1..83cff064 160000 --- a/third_party/kinfer +++ b/third_party/kinfer @@ -1 +1 @@ -Subproject commit e05bfab1cde94e2ff607b3dff7a5604ef0edcd39 +Subproject commit 83cff06425acc31746a3f23ae219a353c6d49306 From aff40731eb7fac86514cf33b7b3fd71b1c169a10 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 14:24:39 -0800 Subject: [PATCH 24/43] new names --- sim/envs/humanoids/gpr_config.py | 4 ++-- sim/model_export.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index c867139f..f6f5b645 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -80,7 +80,7 @@ class env(LeggedRobotCfg.env): ), ), P.ValueSchema( - value_name="state_tensor", + value_name="hist_obs", state_tensor=P.StateTensorSchema( # 11 is the number of single observation features - 6 from IMU, 5 from command input # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) @@ -106,7 +106,7 @@ class env(LeggedRobotCfg.env): ), ), P.ValueSchema( - value_name="state_tensor", + value_name="new_x", state_tensor=P.StateTensorSchema( shape=[frame_stack * (11 + NUM_JOINTS * 3)], dtype=P.DType.FP32, diff --git a/sim/model_export.py b/sim/model_export.py index b8a1bbc0..b57003cf 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -132,7 +132,7 @@ def forward( prev_actions: Tensor, # previous actions taken by the model imu_ang_vel: Tensor, # angular velocity of the IMU imu_euler_xyz: Tensor, # euler angles of the IMU - state_tensor: Tensor, # buffer of previous observations + hist_obs: Tensor, # buffer of previous observations ) -> Dict[str, Tensor]: """Runs the actor model forward pass. @@ -194,7 +194,7 @@ def forward( new_x = torch.clamp(new_x, -self.clip_observations, self.clip_observations) # Add the new frame to the buffer - x = torch.cat((state_tensor, new_x), dim=0) + x = torch.cat((hist_obs, new_x), dim=0) # Pop the oldest frame x = x[self.num_single_obs :] @@ -204,7 +204,7 @@ def forward( actions = self.policy(policy_input).squeeze(0) actions_scaled = actions * self.action_scale - return {"actions": actions_scaled, "actions_raw": actions, "state_tensor": x} + return {"actions": actions_scaled, "actions_raw": actions, "new_x": x} def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]: @@ -265,6 +265,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T "frame_stack": cfg.frame_stack, "clip_observations": cfg.clip_observations, "clip_actions": cfg.clip_actions, + "joint_names": robot.all_joints(), } return ( From ac6a76c695aa56f65eeeffa4bdaefe946249056c Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 14:31:11 -0800 Subject: [PATCH 25/43] policy runs --- sim/sim2sim.py | 196 +++++++++++++++++++++++++++++++++++---------- third_party/kinfer | 2 +- 2 files changed, 153 insertions(+), 45 deletions(-) diff --git a/sim/sim2sim.py b/sim/sim2sim.py index 82ce1137..946b9f2e 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -17,8 +17,9 @@ import onnxruntime as ort import pygame import torch -from kinfer.export.pytorch import export_to_onnx from kinfer.inference.python import ONNXModel +from kinfer.serialize.numpy import NumpyMultiSerializer +from kinfer import proto as P from scipy.spatial.transform import Rotation as R from tqdm import tqdm @@ -92,9 +93,10 @@ def pd_control( return kp * (target_q + default - q) - kd * dq + def run_mujoco( embodiment: str, - policy: ort.InferenceSession, + policy: ONNXModel, model_info: Dict[str, Union[float, List[float], str]], keyboard_use: bool = False, log_h5: bool = False, @@ -121,10 +123,16 @@ def run_mujoco( assert isinstance(model_info["robot_effort"], list) assert isinstance(model_info["robot_stiffness"], list) assert isinstance(model_info["robot_damping"], list) + assert isinstance(model_info["joint_names"], list) + + + tau_limit = np.array(list(model_info["robot_effort"])) * model_info["tau_factor"] + kps = np.array(list(model_info["robot_stiffness"])) + kds = np.array(list(model_info["robot_damping"])) + + joint_names = model_info["joint_names"] - tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * model_info["tau_factor"] - kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"])) - kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"])) + kinfer_serializer = NumpyMultiSerializer(policy.output_schema) try: data.qpos = model.keyframe("default").qpos @@ -147,23 +155,10 @@ def run_mujoco( target_q = np.zeros((model_info["num_actions"]), dtype=np.double) prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double) - hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double) + hist_obs = np.zeros((model_info["num_observations"]), dtype=np.float32) count_lowlevel = 0 - input_data = { - "x_vel.1": np.zeros(1).astype(np.float32), - "y_vel.1": np.zeros(1).astype(np.float32), - "rot.1": np.zeros(1).astype(np.float32), - "t.1": np.zeros(1).astype(np.float32), - "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32), - "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32), - "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32), - "imu_ang_vel.1": np.zeros(3).astype(np.float32), - "imu_euler_xyz.1": np.zeros(3).astype(np.float32), - "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32), - } - if log_h5: from sim.h5_logger import HDF5Logger @@ -211,26 +206,133 @@ def run_mujoco( cur_pos_obs = q - default cur_vel_obs = dq - input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32) - input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32) - input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32) - input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32) + # Form input + dof_pos = P.Value( + value_name="dof_pos", + joint_positions=P.JointPositionsValue( + values=[ + P.JointPositionValue( + joint_name=name, + value=q[index], + unit=P.JointPositionUnit.RADIANS, + ) + for name, index in zip(joint_names, range(len(q))) + ] + ) + ) + + dof_vel = P.Value( + value_name="dof_vel", + joint_velocities=P.JointVelocitiesValue( + values=[ + P.JointVelocityValue( + joint_name=name, + value=dq[index], + unit=P.JointVelocityUnit.RADIANS_PER_SECOND, + ) + for name, index in zip(joint_names, range(len(dq))) + ] + ) + ) - input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32) - input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32) + vector_command = P.Value( + value_name="vector_command", + vector_command=P.VectorCommandValue( + values=[x_vel_cmd, y_vel_cmd, yaw_vel_cmd], + ) + ) + seconds = int(count_lowlevel * model_info["sim_dt"]) + nanoseconds = int((count_lowlevel * model_info["sim_dt"] - seconds) * 1e9) + timestamp = P.Value( + value_name="timestamp", + timestamp=P.TimestampValue( + seconds=seconds, + nanos=nanoseconds, + ) + ) + + prev_actions_value = P.Value( + value_name="prev_actions", + joint_positions=P.JointPositionsValue( + values=[ + P.JointPositionValue( + joint_name=name, + value=prev_actions[index], + unit=P.JointPositionUnit.RADIANS, + ) + for name, index in zip(joint_names, range(len(prev_actions))) + ] + ) + ) + + imu_ang_vel = P.Value( + value_name="imu_ang_vel", + imu=P.ImuValue( + angular_velocity=P.ImuGyroscopeValue( + x=omega[0], + y=omega[1], + z=omega[2], + ) + ) + ) + + imu_euler_xyz = P.Value( + value_name="imu_euler_xyz", + imu=P.ImuValue( + linear_acceleration=P.ImuAccelerometerValue( + x=eu_ang[0], + y=eu_ang[1], + z=eu_ang[2], + ), + ) + ) - input_data["prev_actions.1"] = prev_actions.astype(np.float32) + state_tensor = P.Value( + value_name="hist_obs", + state_tensor=P.StateTensorValue( + data=hist_obs.tobytes(), + ) + ) + + policy_input = P.IO( + values=[ + vector_command, + timestamp, + dof_pos, + dof_vel, + prev_actions_value, + imu_ang_vel, + imu_euler_xyz, + state_tensor, + ] + ) + + # input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32) + # input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32) + # input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32) + + # input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32) + + # input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32) + # input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32) - input_data["imu_ang_vel.1"] = omega.astype(np.float32) - input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32) + # input_data["prev_actions.1"] = prev_actions.astype(np.float32) - input_data["buffer.1"] = hist_obs.astype(np.float32) + # input_data["imu_ang_vel.1"] = omega.astype(np.float32) + # input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32) - policy_output = policy(input_data) - positions = policy_output["actions_scaled"] - curr_actions = policy_output["actions"] - hist_obs = policy_output["x.3"] + # input_data["buffer.1"] = hist_obs.astype(np.float32) + + # policy_output = policy(input_data) + + policy_output = policy(policy_input) + + outputs = kinfer_serializer.serialize_io(policy_output, as_dict=True) + + positions = outputs["actions"] + curr_actions = outputs["actions_raw"] + hist_obs = outputs["new_x"] target_q = positions @@ -305,18 +407,24 @@ def run_mujoco( x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.2, 0.0, 0.0 policy = ONNXModel(args.load_model) - metadata = policy.get_metadata() - model_info = { - "num_actions": metadata["num_actions"], - "num_observations": metadata["num_observations"], - "robot_effort": metadata["robot_effort"], - "robot_stiffness": metadata["robot_stiffness"], - "robot_damping": metadata["robot_damping"], - "sim_dt": metadata["sim_dt"], - "sim_decimation": metadata["sim_decimation"], - "tau_factor": metadata["tau_factor"], - } + metadata = policy.attached_metadata + try: + model_info = { + "num_actions": metadata["num_actions"], + "num_observations": metadata["num_observations"], + "robot_effort": [metadata["robot_effort"][joint] for joint in metadata["joint_names"]], + "robot_stiffness": [metadata["robot_stiffness"][joint] for joint in metadata["joint_names"]], + "robot_damping": [metadata["robot_damping"][joint] for joint in metadata["joint_names"]], + "sim_dt": metadata["sim_dt"], + "sim_decimation": metadata["sim_decimation"], + "tau_factor": metadata["tau_factor"], + "joint_names": metadata["joint_names"], + } + except Exception as e: + print(f"Error finding required metadata 'num_actions', 'num_observations', 'robot_effort', 'robot_stiffness', 'robot_damping', 'sim_dt', 'sim_decimation', 'tau_factor' in metadata: {metadata}") + raise e + run_mujoco( embodiment=args.embodiment, policy=policy, diff --git a/third_party/kinfer b/third_party/kinfer index 83cff064..e7b2dcd2 160000 --- a/third_party/kinfer +++ b/third_party/kinfer @@ -1 +1 @@ -Subproject commit 83cff06425acc31746a3f23ae219a353c6d49306 +Subproject commit e7b2dcd2b7728d853f778e23585b2f3eb26f6658 From 30791e6d5558f8960e7726d0d1674ed264500693 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 14:44:58 -0800 Subject: [PATCH 26/43] joint name ordering --- sim/envs/humanoids/gpr_config.py | 10 +++++----- sim/model_export.py | 2 +- sim/resources/gpr/joints.py | 6 ++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index f6f5b645..0bb526e4 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -45,21 +45,21 @@ class env(LeggedRobotCfg.env): P.ValueSchema( value_name="dof_pos", joint_positions=P.JointPositionsSchema( - joint_names=Robot.all_joints(), + joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS, ), ), P.ValueSchema( value_name="dof_vel", joint_velocities=P.JointVelocitiesSchema( - joint_names=Robot.all_joints(), + joint_names=Robot.joint_names(), unit=P.JointVelocityUnit.RADIANS_PER_SECOND, ), ), P.ValueSchema( value_name="prev_actions", joint_positions=P.JointPositionsSchema( - joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS + joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS ), ), # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data @@ -96,13 +96,13 @@ class env(LeggedRobotCfg.env): P.ValueSchema( value_name="actions", joint_positions=P.JointPositionsSchema( - joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS + joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS ), ), P.ValueSchema( value_name="actions_raw", joint_positions=P.JointPositionsSchema( - joint_names=Robot.all_joints(), unit=P.JointPositionUnit.RADIANS + joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS ), ), P.ValueSchema( diff --git a/sim/model_export.py b/sim/model_export.py index b57003cf..c35d7422 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -265,7 +265,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T "frame_stack": cfg.frame_stack, "clip_observations": cfg.clip_observations, "clip_actions": cfg.clip_actions, - "joint_names": robot.all_joints(), + "joint_names": robot.joint_names(), } return ( diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py index 971f2733..31e1f12e 100755 --- a/sim/resources/gpr/joints.py +++ b/sim/resources/gpr/joints.py @@ -137,6 +137,12 @@ def default_standing(cls) -> Dict[str, float]: Robot.legs.right.ankle_pitch: 0.195, } + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the name of the first joint in the policy. + @classmethod + def joint_names(cls) -> List[str]: + return list(cls.default_standing().keys()) + @classmethod def default_limits(cls) -> Dict[str, Dict[str, float]]: return { From 5283db11d29aae9447e8cffcd33594f78825e099 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 14:47:42 -0800 Subject: [PATCH 27/43] stands --- sim/sim2sim.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sim/sim2sim.py b/sim/sim2sim.py index 946b9f2e..f0356f4d 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -127,8 +127,8 @@ def run_mujoco( tau_limit = np.array(list(model_info["robot_effort"])) * model_info["tau_factor"] - kps = np.array(list(model_info["robot_stiffness"])) - kds = np.array(list(model_info["robot_damping"])) + kps = np.array(model_info["robot_stiffness"]) + kds = np.array(model_info["robot_damping"]) joint_names = model_info["joint_names"] @@ -206,6 +206,7 @@ def run_mujoco( cur_pos_obs = q - default cur_vel_obs = dq + mm = q # Form input dof_pos = P.Value( From 01fdc369a1dda51917ca639349cf5f3c3c687ebe Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 15:00:31 -0800 Subject: [PATCH 28/43] clean --- sim/sim2sim.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/sim/sim2sim.py b/sim/sim2sim.py index f0356f4d..b938d9fb 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -124,6 +124,9 @@ def run_mujoco( assert isinstance(model_info["robot_stiffness"], list) assert isinstance(model_info["robot_damping"], list) assert isinstance(model_info["joint_names"], list) + assert isinstance(model_info["sim_dt"], float) + assert isinstance(model_info["cycle_time"], float) + assert isinstance(model_info["sim_decimation"], int) tau_limit = np.array(list(model_info["robot_effort"])) * model_info["tau_factor"] @@ -197,7 +200,7 @@ def run_mujoco( # Calculate speed and accumulate for average speed calculation speed = np.linalg.norm(v[:2]) # Speed in the x-y plane - total_speed += speed + total_speed += float(speed) step_count += 1 # 1000hz -> 50hz @@ -309,24 +312,6 @@ def run_mujoco( ] ) - # input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32) - # input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32) - # input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32) - - # input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32) - - # input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32) - # input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32) - - # input_data["prev_actions.1"] = prev_actions.astype(np.float32) - - # input_data["imu_ang_vel.1"] = omega.astype(np.float32) - # input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32) - - # input_data["buffer.1"] = hist_obs.astype(np.float32) - - # policy_output = policy(input_data) - policy_output = policy(policy_input) outputs = kinfer_serializer.serialize_io(policy_output, as_dict=True) From f02bd84ae38279c4eb88a8423622d43086ca244a Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 15:01:09 -0800 Subject: [PATCH 29/43] lint --- sim/model_export.py | 4 ++-- sim/sim2sim.py | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/sim/model_export.py b/sim/model_export.py index c35d7422..68203b9d 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -147,14 +147,14 @@ def forward( imu_euler_xyz: The euler angles of the IMU, with shape (3), in radians. "XYZ" means (roll, pitch, yaw). If IMU is not used, can be all zeros. - state_tensor: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is + hist_obs: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is the return value of the previous forward pass. On the first pass, it should be all zeros. Returns: actions_scaled: The actions to take, with shape (num_actions), scaled by the action_scale. actions: The actions to take, with shape (num_actions). - x: The new buffer of observations, with shape (frame_stack * num_single_obs). + new_x: The new buffer of observations, with shape (frame_stack * num_single_obs). """ sin_pos = torch.sin(2 * torch.pi * timestamp / self.cycle_time) cos_pos = torch.cos(2 * torch.pi * timestamp / self.cycle_time) diff --git a/sim/sim2sim.py b/sim/sim2sim.py index b938d9fb..b7688e28 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -17,9 +17,9 @@ import onnxruntime as ort import pygame import torch +from kinfer import proto as P from kinfer.inference.python import ONNXModel from kinfer.serialize.numpy import NumpyMultiSerializer -from kinfer import proto as P from scipy.spatial.transform import Rotation as R from tqdm import tqdm @@ -93,7 +93,6 @@ def pd_control( return kp * (target_q + default - q) - kd * dq - def run_mujoco( embodiment: str, policy: ONNXModel, @@ -127,7 +126,6 @@ def run_mujoco( assert isinstance(model_info["sim_dt"], float) assert isinstance(model_info["cycle_time"], float) assert isinstance(model_info["sim_decimation"], int) - tau_limit = np.array(list(model_info["robot_effort"])) * model_info["tau_factor"] kps = np.array(model_info["robot_stiffness"]) @@ -223,7 +221,7 @@ def run_mujoco( ) for name, index in zip(joint_names, range(len(q))) ] - ) + ), ) dof_vel = P.Value( @@ -237,14 +235,14 @@ def run_mujoco( ) for name, index in zip(joint_names, range(len(dq))) ] - ) + ), ) vector_command = P.Value( value_name="vector_command", vector_command=P.VectorCommandValue( values=[x_vel_cmd, y_vel_cmd, yaw_vel_cmd], - ) + ), ) seconds = int(count_lowlevel * model_info["sim_dt"]) nanoseconds = int((count_lowlevel * model_info["sim_dt"] - seconds) * 1e9) @@ -253,7 +251,7 @@ def run_mujoco( timestamp=P.TimestampValue( seconds=seconds, nanos=nanoseconds, - ) + ), ) prev_actions_value = P.Value( @@ -267,7 +265,7 @@ def run_mujoco( ) for name, index in zip(joint_names, range(len(prev_actions))) ] - ) + ), ) imu_ang_vel = P.Value( @@ -278,7 +276,7 @@ def run_mujoco( y=omega[1], z=omega[2], ) - ) + ), ) imu_euler_xyz = P.Value( @@ -289,14 +287,14 @@ def run_mujoco( y=eu_ang[1], z=eu_ang[2], ), - ) + ), ) state_tensor = P.Value( value_name="hist_obs", state_tensor=P.StateTensorValue( data=hist_obs.tobytes(), - ) + ), ) policy_input = P.IO( @@ -408,9 +406,11 @@ def run_mujoco( "joint_names": metadata["joint_names"], } except Exception as e: - print(f"Error finding required metadata 'num_actions', 'num_observations', 'robot_effort', 'robot_stiffness', 'robot_damping', 'sim_dt', 'sim_decimation', 'tau_factor' in metadata: {metadata}") + print( + f"Error finding required metadata 'num_actions', 'num_observations', 'robot_effort', 'robot_stiffness', 'robot_damping', 'sim_dt', 'sim_decimation', 'tau_factor' in metadata: {metadata}" + ) raise e - + run_mujoco( embodiment=args.embodiment, policy=policy, From 87f0253fbdb5e1d02ea21f1e8aeb5a2bc272dac8 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 21:47:35 -0800 Subject: [PATCH 30/43] cycle time --- sim/model_export.py | 1 + sim/sim2sim.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sim/model_export.py b/sim/model_export.py index 68203b9d..e3ee0d4f 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -266,6 +266,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T "clip_observations": cfg.clip_observations, "clip_actions": cfg.clip_actions, "joint_names": robot.joint_names(), + "cycle_time": cfg.cycle_time, } return ( diff --git a/sim/sim2sim.py b/sim/sim2sim.py index b7688e28..3823d8c0 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -207,8 +207,6 @@ def run_mujoco( cur_pos_obs = q - default cur_vel_obs = dq - mm = q - # Form input dof_pos = P.Value( value_name="dof_pos", @@ -244,6 +242,7 @@ def run_mujoco( values=[x_vel_cmd, y_vel_cmd, yaw_vel_cmd], ), ) + seconds = int(count_lowlevel * model_info["sim_dt"]) nanoseconds = int((count_lowlevel * model_info["sim_dt"] - seconds) * 1e9) timestamp = P.Value( @@ -404,6 +403,7 @@ def run_mujoco( "sim_decimation": metadata["sim_decimation"], "tau_factor": metadata["tau_factor"], "joint_names": metadata["joint_names"], + "cycle_time": metadata["cycle_time"], } except Exception as e: print( From 9cfde7f7072b28ec57ac6fd5d57dff9ab16ffb4c Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 22:06:57 -0800 Subject: [PATCH 31/43] noise to default --- sim/envs/base/legged_robot.py | 11 +++++++++++ sim/envs/humanoids/gpr_config.py | 1 + 2 files changed, 12 insertions(+) diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py index cb3bdf60..fb89d649 100644 --- a/sim/envs/base/legged_robot.py +++ b/sim/envs/base/legged_robot.py @@ -81,6 +81,7 @@ def step(self, actions): def reset(self): """Reset all robots""" self.reset_idx(torch.arange(self.num_envs, device=self.device)) + self._resample_default_positions() obs, privileged_obs, _, _, _ = self.step( torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False) ) @@ -470,6 +471,16 @@ def update_command_curriculum(self, env_ids): self.command_ranges["lin_vel_x"][1] + 0.5, 0.0, self.cfg.commands.max_curriculum ) + + def _resample_default_positions(self): + self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) + for i in range(self.num_dofs): + name = self.dof_names[i] + self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name] + + if self.add_noise: + self.default_dof_pos[i] *= self.cfg.noise.noise_scales.default_pos + # ---------------------------------------- def _init_buffers(self): """Initialize torch tensors which will contain simulation states and processed quantities""" diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 0bb526e4..3cdbf730 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -167,6 +167,7 @@ class noise_scales: lin_vel = 0.05 quat = 0.03 height_measurements = 0.1 + default_pos = 0.05 class init_state(LeggedRobotCfg.init_state): pos = [0.0, 0.0, Robot.height] From bc56f7ec823254f28834d2b99c2242abefe77c70 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 23:34:00 -0800 Subject: [PATCH 32/43] change noise behavior --- sim/envs/base/legged_robot.py | 2 +- sim/envs/humanoids/gpr_config.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py index fb89d649..ed4b3461 100644 --- a/sim/envs/base/legged_robot.py +++ b/sim/envs/base/legged_robot.py @@ -479,7 +479,7 @@ def _resample_default_positions(self): self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name] if self.add_noise: - self.default_dof_pos[i] *= self.cfg.noise.noise_scales.default_pos + self.default_dof_pos[i] += torch.randn(1) * self.cfg.noise.noise_ranges.default_pos # ---------------------------------------- def _init_buffers(self): diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 3cdbf730..e9d6989e 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -167,7 +167,9 @@ class noise_scales: lin_vel = 0.05 quat = 0.03 height_measurements = 0.1 - default_pos = 0.05 + + class nosie_ranges: + default_pos = 0.05 # +- 0.05 rad class init_state(LeggedRobotCfg.init_state): pos = [0.0, 0.0, Robot.height] From 3574ef1bbfa73d20e47e5b84d379d1c7be9e8677 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 23:34:35 -0800 Subject: [PATCH 33/43] less noise --- sim/envs/humanoids/gpr_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index e9d6989e..f8c0708b 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -169,7 +169,7 @@ class noise_scales: height_measurements = 0.1 class nosie_ranges: - default_pos = 0.05 # +- 0.05 rad + default_pos = 0.03 # +- 0.05 rad class init_state(LeggedRobotCfg.init_state): pos = [0.0, 0.0, Robot.height] From 63ed69427f4cb7fec870c04463ef0ce052d1bcd5 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 1 Jan 2025 23:37:36 -0800 Subject: [PATCH 34/43] fix noise --- sim/envs/base/legged_robot.py | 2 +- sim/envs/humanoids/gpr_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py index ed4b3461..1cc6e5af 100644 --- a/sim/envs/base/legged_robot.py +++ b/sim/envs/base/legged_robot.py @@ -479,7 +479,7 @@ def _resample_default_positions(self): self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name] if self.add_noise: - self.default_dof_pos[i] += torch.randn(1) * self.cfg.noise.noise_ranges.default_pos + self.default_dof_pos[i] += torch.randn(1).item() * self.cfg.noise.noise_ranges.default_pos # ---------------------------------------- def _init_buffers(self): diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index f8c0708b..9bd940d2 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -168,7 +168,7 @@ class noise_scales: quat = 0.03 height_measurements = 0.1 - class nosie_ranges: + class noise_ranges: default_pos = 0.03 # +- 0.05 rad class init_state(LeggedRobotCfg.init_state): From d537864b4df19a2cfc762b02116ad09e52e8048e Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Thu, 2 Jan 2025 01:03:34 -0800 Subject: [PATCH 35/43] lint --- sim/envs/base/legged_robot.py | 1 - sim/envs/humanoids/gpr_config.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py index 1cc6e5af..1b17e60b 100644 --- a/sim/envs/base/legged_robot.py +++ b/sim/envs/base/legged_robot.py @@ -471,7 +471,6 @@ def update_command_curriculum(self, env_ids): self.command_ranges["lin_vel_x"][1] + 0.5, 0.0, self.cfg.commands.max_curriculum ) - def _resample_default_positions(self): self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) for i in range(self.num_dofs): diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 9bd940d2..8099328d 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -169,7 +169,7 @@ class noise_scales: height_measurements = 0.1 class noise_ranges: - default_pos = 0.03 # +- 0.05 rad + default_pos = 0.03 # +- 0.05 rad class init_state(LeggedRobotCfg.init_state): pos = [0.0, 0.0, Robot.height] From 20add09d3eedd02aefbce6a8feb99f9e761d51f2 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Thu, 2 Jan 2025 14:54:21 -0800 Subject: [PATCH 36/43] get name from schema --- sim/sim2sim.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sim/sim2sim.py b/sim/sim2sim.py index 3823d8c0..c737521b 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -392,6 +392,12 @@ def run_mujoco( policy = ONNXModel(args.load_model) metadata = policy.attached_metadata + joint_names = [] + for value_schema in policy.input_schema.values + policy.output_schema.values: + if value_schema.HasField("joint_positions"): + joint_names = list(value_schema.joint_positions.joint_names) + break + try: model_info = { "num_actions": metadata["num_actions"], @@ -402,7 +408,7 @@ def run_mujoco( "sim_dt": metadata["sim_dt"], "sim_decimation": metadata["sim_decimation"], "tau_factor": metadata["tau_factor"], - "joint_names": metadata["joint_names"], + "joint_names": joint_names, "cycle_time": metadata["cycle_time"], } except Exception as e: From 07b9a9dde9835354dedef98a9af615109baed109 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Thu, 2 Jan 2025 14:57:00 -0800 Subject: [PATCH 37/43] fix --- sim/envs/humanoids/gpr_config.py | 4 ++-- sim/sim2sim.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 8099328d..506591b6 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -6,6 +6,8 @@ LeggedRobotCfgPPO, ) from sim.resources.gpr.joints import Robot +from kinfer import proto as P + NUM_JOINTS = len(Robot.all_joints()) @@ -26,8 +28,6 @@ class env(LeggedRobotCfg.env): episode_length_s = 24 # episode length in seconds use_ref_actions = False - from kinfer import proto as P - input_schema = P.IOSchema( values=[ P.ValueSchema( diff --git a/sim/sim2sim.py b/sim/sim2sim.py index c737521b..a32515e5 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -393,7 +393,7 @@ def run_mujoco( metadata = policy.attached_metadata joint_names = [] - for value_schema in policy.input_schema.values + policy.output_schema.values: + for value_schema in policy.input_schema.values: if value_schema.HasField("joint_positions"): joint_names = list(value_schema.joint_positions.joint_names) break From d37961d14569bf5d893fcbf9bddb6a88f41d2638 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Thu, 2 Jan 2025 14:58:01 -0800 Subject: [PATCH 38/43] lint --- sim/envs/humanoids/gpr_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 506591b6..56ceaebc 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -1,13 +1,13 @@ """Defines the environment configuration for the Getting up task""" +from kinfer import proto as P + from sim.env import robot_urdf_path from sim.envs.base.legged_robot_config import ( # type: ignore LeggedRobotCfg, LeggedRobotCfgPPO, ) from sim.resources.gpr.joints import Robot -from kinfer import proto as P - NUM_JOINTS = len(Robot.all_joints()) From 34783c0ecc036beae52dc659c4cb57c7c30923d1 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Thu, 2 Jan 2025 14:59:30 -0800 Subject: [PATCH 39/43] more info from schema --- sim/sim2sim.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sim/sim2sim.py b/sim/sim2sim.py index a32515e5..8b846ce9 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -400,11 +400,11 @@ def run_mujoco( try: model_info = { - "num_actions": metadata["num_actions"], + "num_actions": len(joint_names), "num_observations": metadata["num_observations"], - "robot_effort": [metadata["robot_effort"][joint] for joint in metadata["joint_names"]], - "robot_stiffness": [metadata["robot_stiffness"][joint] for joint in metadata["joint_names"]], - "robot_damping": [metadata["robot_damping"][joint] for joint in metadata["joint_names"]], + "robot_effort": [metadata["robot_effort"][joint] for joint in joint_names], + "robot_stiffness": [metadata["robot_stiffness"][joint] for joint in joint_names], + "robot_damping": [metadata["robot_damping"][joint] for joint in joint_names], "sim_dt": metadata["sim_dt"], "sim_decimation": metadata["sim_decimation"], "tau_factor": metadata["tau_factor"], From d3ca3d0d4278860d7c8a6111607f8d6c7cbbc05c Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Thu, 2 Jan 2025 15:24:57 -0800 Subject: [PATCH 40/43] imu --- sim/resources/gpr/robot_fixed.urdf | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf index 904e5ec3..20f2fff7 100644 --- a/sim/resources/gpr/robot_fixed.urdf +++ b/sim/resources/gpr/robot_fixed.urdf @@ -6,6 +6,22 @@ + + + + + + + + + + + + + + + + @@ -613,4 +629,4 @@ - \ No newline at end of file + From 570541873dea7755db7bfd7cd45f211590541b3a Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Thu, 2 Jan 2025 15:30:59 -0800 Subject: [PATCH 41/43] imu --- sim/resources/gpr/robot_fixed.urdf | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf index 20f2fff7..b16a8ba5 100644 --- a/sim/resources/gpr/robot_fixed.urdf +++ b/sim/resources/gpr/robot_fixed.urdf @@ -7,20 +7,26 @@ - - + - + - - + + - - - - + + + + From cfb251331e8431e36435b6d93c0cac412f1fe93d Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Thu, 2 Jan 2025 15:39:05 -0800 Subject: [PATCH 42/43] add imu missing warning --- sim/envs/base/legged_robot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py index 1b17e60b..2953ee52 100644 --- a/sim/envs/base/legged_robot.py +++ b/sim/envs/base/legged_robot.py @@ -801,6 +801,7 @@ def _create_envs(self): ) if self.imu_indices == -1: self.imu_indices = None + print(f"Warning: IMU {self.cfg.asset.imu_name} not found in the asset. Defaulting to base link.") self.penalised_contact_indices = torch.zeros( len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False From d37834d4e300856f544d5de4113d32f1e8d5d8cc Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Thu, 2 Jan 2025 17:00:55 -0800 Subject: [PATCH 43/43] remove default randomization --- sim/envs/base/legged_robot.py | 2 +- sim/envs/humanoids/gpr_config.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py index 2953ee52..47edade7 100644 --- a/sim/envs/base/legged_robot.py +++ b/sim/envs/base/legged_robot.py @@ -81,7 +81,7 @@ def step(self, actions): def reset(self): """Reset all robots""" self.reset_idx(torch.arange(self.num_envs, device=self.device)) - self._resample_default_positions() + # self._resample_default_positions() obs, privileged_obs, _, _, _ = self.step( torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False) ) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 56ceaebc..128183f9 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -168,8 +168,9 @@ class noise_scales: quat = 0.03 height_measurements = 0.1 - class noise_ranges: - default_pos = 0.03 # +- 0.05 rad + # Currently unused + # class noise_ranges: + # default_pos = 0.03 # +- 0.05 rad class init_state(LeggedRobotCfg.init_state): pos = [0.0, 0.0, Robot.height]