diff --git a/.gitignore b/.gitignore index d09d6354..194270e2 100755 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ wandb/ runs/ isaacgym/ *.h5 + +# dev +ref/ diff --git a/.gitmodules b/.gitmodules index 1d08b1ee..4e07ec9f 100755 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "third_party/isaacgym"] path = third_party/isaacgym - url = ../../kscalelabs/isaacgym.git \ No newline at end of file + url = ../../kscalelabs/isaacgym.git +[submodule "third_party/kinfer"] + path = third_party/kinfer + url = https://github.com/kscalelabs/kinfer.git diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py index cb3bdf60..47edade7 100644 --- a/sim/envs/base/legged_robot.py +++ b/sim/envs/base/legged_robot.py @@ -81,6 +81,7 @@ def step(self, actions): def reset(self): """Reset all robots""" self.reset_idx(torch.arange(self.num_envs, device=self.device)) + # self._resample_default_positions() obs, privileged_obs, _, _, _ = self.step( torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False) ) @@ -470,6 +471,15 @@ def update_command_curriculum(self, env_ids): self.command_ranges["lin_vel_x"][1] + 0.5, 0.0, self.cfg.commands.max_curriculum ) + def _resample_default_positions(self): + self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) + for i in range(self.num_dofs): + name = self.dof_names[i] + self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name] + + if self.add_noise: + self.default_dof_pos[i] += torch.randn(1).item() * self.cfg.noise.noise_ranges.default_pos + # ---------------------------------------- def _init_buffers(self): """Initialize torch tensors which will contain simulation states and processed quantities""" @@ -791,6 +801,7 @@ def _create_envs(self): ) if self.imu_indices == -1: self.imu_indices = None + print(f"Warning: IMU {self.cfg.asset.imu_name} not found in the asset. Defaulting to base link.") self.penalised_contact_indices = torch.zeros( len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py index e87d2252..71428b71 100644 --- a/sim/envs/base/legged_robot_config.py +++ b/sim/envs/base/legged_robot_config.py @@ -29,6 +29,8 @@ # # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. +import os +import sys from enum import Enum from sim.envs.base.base_config import BaseConfig diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index c59372d3..128183f9 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -1,5 +1,7 @@ """Defines the environment configuration for the Getting up task""" +from kinfer import proto as P + from sim.env import robot_urdf_path from sim.envs.base.legged_robot_config import ( # type: ignore LeggedRobotCfg, @@ -26,6 +28,93 @@ class env(LeggedRobotCfg.env): episode_length_s = 24 # episode length in seconds use_ref_actions = False + input_schema = P.IOSchema( + values=[ + P.ValueSchema( + value_name="vector_command", + vector_command=P.VectorCommandSchema( + dimensions=3, # x_vel, y_vel, rot + ), + ), + P.ValueSchema( + value_name="timestamp", + timestamp=P.TimestampSchema( + start_seconds=0, + ), + ), + P.ValueSchema( + value_name="dof_pos", + joint_positions=P.JointPositionsSchema( + joint_names=Robot.joint_names(), + unit=P.JointPositionUnit.RADIANS, + ), + ), + P.ValueSchema( + value_name="dof_vel", + joint_velocities=P.JointVelocitiesSchema( + joint_names=Robot.joint_names(), + unit=P.JointVelocityUnit.RADIANS_PER_SECOND, + ), + ), + P.ValueSchema( + value_name="prev_actions", + joint_positions=P.JointPositionsSchema( + joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS + ), + ), + # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data + P.ValueSchema( + value_name="imu_ang_vel", + imu=P.ImuSchema( + use_accelerometer=False, + use_gyroscope=True, + use_magnetometer=False, + ), + ), + P.ValueSchema( + value_name="imu_euler_xyz", + imu=P.ImuSchema( + use_accelerometer=True, + use_gyroscope=False, + use_magnetometer=False, + ), + ), + P.ValueSchema( + value_name="hist_obs", + state_tensor=P.StateTensorSchema( + # 11 is the number of single observation features - 6 from IMU, 5 from command input + # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) + shape=[frame_stack * (11 + NUM_JOINTS * 3)], + dtype=P.DType.FP32, + ), + ), + ] + ) + + output_schema = P.IOSchema( + values=[ + P.ValueSchema( + value_name="actions", + joint_positions=P.JointPositionsSchema( + joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS + ), + ), + P.ValueSchema( + value_name="actions_raw", + joint_positions=P.JointPositionsSchema( + joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS + ), + ), + P.ValueSchema( + value_name="new_x", + state_tensor=P.StateTensorSchema( + shape=[frame_stack * (11 + NUM_JOINTS * 3)], + dtype=P.DType.FP32, + ), + ), + ] + ) + class safety(LeggedRobotCfg.safety): # safety factors pos_limit = 1.0 @@ -79,6 +168,10 @@ class noise_scales: quat = 0.03 height_measurements = 0.1 + # Currently unused + # class noise_ranges: + # default_pos = 0.03 # +- 0.05 rad + class init_state(LeggedRobotCfg.init_state): pos = [0.0, 0.0, Robot.height] rot = Robot.rotation diff --git a/sim/model_export.py b/sim/model_export.py index 3777124a..e3ee0d4f 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -3,7 +3,7 @@ import re from dataclasses import dataclass, fields from io import BytesIO -from typing import List, Optional, Tuple +from typing import Dict, List, Optional, Tuple import onnx import onnxruntime as ort @@ -125,24 +125,20 @@ def get_init_buffer(self) -> Tensor: def forward( self, - x_vel: Tensor, # x-coordinate of the target velocity - y_vel: Tensor, # y-coordinate of the target velocity - rot: Tensor, # target angular velocity - t: Tensor, # current policy time (sec) + vector_command: Tensor, # (x_vel, y_vel, rot) + timestamp: Tensor, # current policy time (sec) dof_pos: Tensor, # current angular position of the DoFs relative to default dof_vel: Tensor, # current angular velocity of the DoFs prev_actions: Tensor, # previous actions taken by the model imu_ang_vel: Tensor, # angular velocity of the IMU imu_euler_xyz: Tensor, # euler angles of the IMU - buffer: Tensor, # buffer of previous observations - ) -> Tuple[Tensor, Tensor, Tensor]: + hist_obs: Tensor, # buffer of previous observations + ) -> Dict[str, Tensor]: """Runs the actor model forward pass. Args: - x_vel: The x-coordinate of the target velocity, with shape (1). - y_vel: The y-coordinate of the target velocity, with shape (1). - rot: The target angular velocity, with shape (1). - t: The current policy time step, with shape (1). + vector_command: The target velocity vector, with shape (3). It consistes of x_vel, y_vel, and rot. + timestamp: The current policy time step, with shape (1). dof_pos: The current angular position of the DoFs relative to default, with shape (num_actions). dof_vel: The current angular velocity of the DoFs, with shape (num_actions). prev_actions: The previous actions taken by the model, with shape (num_actions). @@ -151,17 +147,19 @@ def forward( imu_euler_xyz: The euler angles of the IMU, with shape (3), in radians. "XYZ" means (roll, pitch, yaw). If IMU is not used, can be all zeros. - buffer: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is + hist_obs: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is the return value of the previous forward pass. On the first pass, it should be all zeros. Returns: actions_scaled: The actions to take, with shape (num_actions), scaled by the action_scale. actions: The actions to take, with shape (num_actions). - x: The new buffer of observations, with shape (frame_stack * num_single_obs). + new_x: The new buffer of observations, with shape (frame_stack * num_single_obs). """ - sin_pos = torch.sin(2 * torch.pi * t / self.cycle_time) - cos_pos = torch.cos(2 * torch.pi * t / self.cycle_time) + sin_pos = torch.sin(2 * torch.pi * timestamp / self.cycle_time) + cos_pos = torch.cos(2 * torch.pi * timestamp / self.cycle_time) + + x_vel, y_vel, rot = vector_command.split(1) # Construct command input command_input = torch.cat( @@ -186,8 +184,8 @@ def forward( q, dq, prev_actions, - imu_ang_vel * self.ang_vel_scale, - imu_euler_xyz * self.quat_scale, + imu_ang_vel.squeeze(0) * self.ang_vel_scale, + imu_euler_xyz.squeeze(0) * self.quat_scale, ), dim=0, ) @@ -196,7 +194,7 @@ def forward( new_x = torch.clamp(new_x, -self.clip_observations, self.clip_observations) # Add the new frame to the buffer - x = torch.cat((buffer, new_x), dim=0) + x = torch.cat((hist_obs, new_x), dim=0) # Pop the oldest frame x = x[self.num_single_obs :] @@ -206,7 +204,7 @@ def forward( actions = self.policy(policy_input).squeeze(0) actions_scaled = actions * self.action_scale - return actions_scaled, actions, x + return {"actions": actions_scaled, "actions_raw": actions, "new_x": x} def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]: @@ -239,24 +237,41 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, imu_ang_vel, imu_euler_xyz, buffer) # Add sim2sim metadata - robot_effort = list(a_model.robot.effort().values()) - robot_stiffness = list(a_model.robot.stiffness().values()) - robot_damping = list(a_model.robot.damping().values()) + robot = a_model.robot + robot_effort = robot.effort_mapping() + robot_stiffness = robot.stiffness_mapping() + robot_damping = robot.damping_mapping() num_actions = a_model.num_actions num_observations = a_model.num_observations - default_standing = list(a_model.robot.default_standing().values()) + default_standing = robot.default_standing() + + metadata = { + "num_actions": num_actions, + "num_observations": num_observations, + "robot_effort": robot_effort, + "robot_stiffness": robot_stiffness, + "robot_damping": robot_damping, + "default_standing": default_standing, + "sim_dt": cfg.sim_dt, + "sim_decimation": cfg.sim_decimation, + "tau_factor": cfg.tau_factor, + "action_scale": cfg.action_scale, + "lin_vel_scale": cfg.lin_vel_scale, + "ang_vel_scale": cfg.ang_vel_scale, + "quat_scale": cfg.quat_scale, + "dof_pos_scale": cfg.dof_pos_scale, + "dof_vel_scale": cfg.dof_vel_scale, + "frame_stack": cfg.frame_stack, + "clip_observations": cfg.clip_observations, + "clip_actions": cfg.clip_actions, + "joint_names": robot.joint_names(), + "cycle_time": cfg.cycle_time, + } return ( a_model, - { - "robot_effort": robot_effort, - "robot_stiffness": robot_stiffness, - "robot_damping": robot_damping, - "num_actions": num_actions, - "num_observations": num_observations, - "default_standing": default_standing, - }, + metadata, input_tensors, ) diff --git a/sim/play.py b/sim/play.py index 04465dd9..1726cf76 100755 --- a/sim/play.py +++ b/sim/play.py @@ -9,16 +9,16 @@ import logging import math import os -import time -import uuid from datetime import datetime from typing import Any, Union import cv2 import h5py import numpy as np +import onnx from isaacgym import gymapi -from kinfer.export.pytorch import export_to_onnx +from kinfer import proto as P +from kinfer.export.pytorch import export_model from tqdm import tqdm from sim.env import run_dir # noqa: E402 @@ -28,6 +28,7 @@ from sim.model_export import ActorCfg, get_actor_policy from sim.utils.helpers import get_args # noqa: E402 from sim.utils.logger import Logger # noqa: E402 +from sim.utils.resources import load_embodiment import torch # special case with isort: skip comment @@ -37,9 +38,14 @@ def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None: os.makedirs(path, exist_ok=True) path = os.path.join(path, "policy_1.pt") + model = get_actor_jit(actor_critic) + model.save(path) + + +def get_actor_jit(actor_critic: Any) -> Any: model = copy.deepcopy(actor_critic.actor).to("cpu") traced_script_module = torch.jit.script(model) - traced_script_module.save(path) + return traced_script_module def play(args: argparse.Namespace) -> None: @@ -84,12 +90,15 @@ def play(args: argparse.Namespace) -> None: export_policy_as_jit(ppo_runner.alg.actor_critic, path) print("Exported policy as jit script to: ", path) - # export policy as a onnx module (used to run it on web) + if env_cfg.env.input_schema is None or env_cfg.env.output_schema is None: + raise ValueError("Input or output schema is None") + + # Create the full model schema + model_schema = P.ModelSchema(input_schema=env_cfg.env.input_schema, output_schema=env_cfg.env.output_schema) + if args.export_onnx: - path = ppo_runner.load_path - embodiment = ppo_runner.cfg["experiment_name"].lower() - policy_cfg = ActorCfg( - embodiment=embodiment, + actor_cfg = ActorCfg( + embodiment=ppo_runner.cfg["experiment_name"].lower(), cycle_time=env_cfg.rewards.cycle_time, sim_dt=env_cfg.sim.dt, sim_decimation=env_cfg.control.decimation, @@ -104,14 +113,13 @@ def play(args: argparse.Namespace) -> None: clip_observations=env_cfg.normalization.clip_observations, clip_actions=env_cfg.normalization.clip_actions, ) - actor_model, sim2sim_info, input_tensors = get_actor_policy(path, policy_cfg) - - # Merge policy_cfg and sim2sim_info into a single config object - export_config = {**vars(policy_cfg), **sim2sim_info} - - export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_policy.onnx") - print("Exported policy as kinfer-compatible onnx to: ", path) - + jit_policy, metadata, _ = get_actor_policy(ppo_runner.load_path, actor_cfg) + kinfer_policy = export_model( + model=jit_policy, + schema=model_schema, + metadata=metadata, + ) + onnx.save(kinfer_policy, "policy.kinfer") # Prepare for logging env_logger = Logger(env.dt) robot_index = 0 diff --git a/sim/requirements.txt b/sim/requirements.txt index fbbdc4e6..ffac4a26 100755 --- a/sim/requirements.txt +++ b/sim/requirements.txt @@ -10,6 +10,7 @@ numpy==1.21.6 wandb tensorboard==2.14.0 onnxscript +onnx mujoco==2.3.6 kinfer==0.0.5 -opencv-python \ No newline at end of file +opencv-python diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py index 3c4dd5d5..31e1f12e 100755 --- a/sim/resources/gpr/joints.py +++ b/sim/resources/gpr/joints.py @@ -137,6 +137,12 @@ def default_standing(cls) -> Dict[str, float]: Robot.legs.right.ankle_pitch: 0.195, } + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the name of the first joint in the policy. + @classmethod + def joint_names(cls) -> List[str]: + return list(cls.default_standing().keys()) + @classmethod def default_limits(cls) -> Dict[str, Dict[str, float]]: return { @@ -155,6 +161,15 @@ def stiffness(cls) -> Dict[str, float]: "ankle_y": 40, } + @classmethod + def stiffness_mapping(cls) -> Dict[str, float]: + mapping = {} + stiffness = cls.stiffness() + for side in ["L", "R"]: + for joint, value in stiffness.items(): + mapping[f"{side}_{joint}"] = value + return mapping + # d_gains @classmethod def damping(cls) -> Dict[str, float]: @@ -166,6 +181,15 @@ def damping(cls) -> Dict[str, float]: "ankle_y": 5, } + @classmethod + def damping_mapping(cls) -> Dict[str, float]: + mapping = {} + damping = cls.damping() + for side in ["L", "R"]: + for joint, value in damping.items(): + mapping[f"{side}_{joint}"] = value + return mapping + # effort_limits @classmethod def effort(cls) -> Dict[str, float]: @@ -177,6 +201,15 @@ def effort(cls) -> Dict[str, float]: "ankle_y": 17, } + @classmethod + def effort_mapping(cls) -> Dict[str, float]: + mapping = {} + effort = cls.effort() + for side in ["L", "R"]: + for joint, value in effort.items(): + mapping[f"{side}_{joint}"] = value + return mapping + # vel_limits @classmethod def velocity(cls) -> Dict[str, float]: diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf index 904e5ec3..b16a8ba5 100644 --- a/sim/resources/gpr/robot_fixed.urdf +++ b/sim/resources/gpr/robot_fixed.urdf @@ -6,6 +6,28 @@ + + + + + + + + + + + + + + + @@ -613,4 +635,4 @@ - \ No newline at end of file + diff --git a/sim/sim2sim.py b/sim/sim2sim.py index 82ce1137..8b846ce9 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -17,8 +17,9 @@ import onnxruntime as ort import pygame import torch -from kinfer.export.pytorch import export_to_onnx +from kinfer import proto as P from kinfer.inference.python import ONNXModel +from kinfer.serialize.numpy import NumpyMultiSerializer from scipy.spatial.transform import Rotation as R from tqdm import tqdm @@ -94,7 +95,7 @@ def pd_control( def run_mujoco( embodiment: str, - policy: ort.InferenceSession, + policy: ONNXModel, model_info: Dict[str, Union[float, List[float], str]], keyboard_use: bool = False, log_h5: bool = False, @@ -121,10 +122,18 @@ def run_mujoco( assert isinstance(model_info["robot_effort"], list) assert isinstance(model_info["robot_stiffness"], list) assert isinstance(model_info["robot_damping"], list) + assert isinstance(model_info["joint_names"], list) + assert isinstance(model_info["sim_dt"], float) + assert isinstance(model_info["cycle_time"], float) + assert isinstance(model_info["sim_decimation"], int) - tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * model_info["tau_factor"] - kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"])) - kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"])) + tau_limit = np.array(list(model_info["robot_effort"])) * model_info["tau_factor"] + kps = np.array(model_info["robot_stiffness"]) + kds = np.array(model_info["robot_damping"]) + + joint_names = model_info["joint_names"] + + kinfer_serializer = NumpyMultiSerializer(policy.output_schema) try: data.qpos = model.keyframe("default").qpos @@ -147,23 +156,10 @@ def run_mujoco( target_q = np.zeros((model_info["num_actions"]), dtype=np.double) prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double) - hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double) + hist_obs = np.zeros((model_info["num_observations"]), dtype=np.float32) count_lowlevel = 0 - input_data = { - "x_vel.1": np.zeros(1).astype(np.float32), - "y_vel.1": np.zeros(1).astype(np.float32), - "rot.1": np.zeros(1).astype(np.float32), - "t.1": np.zeros(1).astype(np.float32), - "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32), - "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32), - "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32), - "imu_ang_vel.1": np.zeros(3).astype(np.float32), - "imu_euler_xyz.1": np.zeros(3).astype(np.float32), - "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32), - } - if log_h5: from sim.h5_logger import HDF5Logger @@ -202,7 +198,7 @@ def run_mujoco( # Calculate speed and accumulate for average speed calculation speed = np.linalg.norm(v[:2]) # Speed in the x-y plane - total_speed += speed + total_speed += float(speed) step_count += 1 # 1000hz -> 50hz @@ -211,26 +207,115 @@ def run_mujoco( cur_pos_obs = q - default cur_vel_obs = dq - input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32) - input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32) - input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32) - - input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32) - - input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32) - input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32) - - input_data["prev_actions.1"] = prev_actions.astype(np.float32) - - input_data["imu_ang_vel.1"] = omega.astype(np.float32) - input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32) - - input_data["buffer.1"] = hist_obs.astype(np.float32) - - policy_output = policy(input_data) - positions = policy_output["actions_scaled"] - curr_actions = policy_output["actions"] - hist_obs = policy_output["x.3"] + # Form input + dof_pos = P.Value( + value_name="dof_pos", + joint_positions=P.JointPositionsValue( + values=[ + P.JointPositionValue( + joint_name=name, + value=q[index], + unit=P.JointPositionUnit.RADIANS, + ) + for name, index in zip(joint_names, range(len(q))) + ] + ), + ) + + dof_vel = P.Value( + value_name="dof_vel", + joint_velocities=P.JointVelocitiesValue( + values=[ + P.JointVelocityValue( + joint_name=name, + value=dq[index], + unit=P.JointVelocityUnit.RADIANS_PER_SECOND, + ) + for name, index in zip(joint_names, range(len(dq))) + ] + ), + ) + + vector_command = P.Value( + value_name="vector_command", + vector_command=P.VectorCommandValue( + values=[x_vel_cmd, y_vel_cmd, yaw_vel_cmd], + ), + ) + + seconds = int(count_lowlevel * model_info["sim_dt"]) + nanoseconds = int((count_lowlevel * model_info["sim_dt"] - seconds) * 1e9) + timestamp = P.Value( + value_name="timestamp", + timestamp=P.TimestampValue( + seconds=seconds, + nanos=nanoseconds, + ), + ) + + prev_actions_value = P.Value( + value_name="prev_actions", + joint_positions=P.JointPositionsValue( + values=[ + P.JointPositionValue( + joint_name=name, + value=prev_actions[index], + unit=P.JointPositionUnit.RADIANS, + ) + for name, index in zip(joint_names, range(len(prev_actions))) + ] + ), + ) + + imu_ang_vel = P.Value( + value_name="imu_ang_vel", + imu=P.ImuValue( + angular_velocity=P.ImuGyroscopeValue( + x=omega[0], + y=omega[1], + z=omega[2], + ) + ), + ) + + imu_euler_xyz = P.Value( + value_name="imu_euler_xyz", + imu=P.ImuValue( + linear_acceleration=P.ImuAccelerometerValue( + x=eu_ang[0], + y=eu_ang[1], + z=eu_ang[2], + ), + ), + ) + + state_tensor = P.Value( + value_name="hist_obs", + state_tensor=P.StateTensorValue( + data=hist_obs.tobytes(), + ), + ) + + policy_input = P.IO( + values=[ + vector_command, + timestamp, + dof_pos, + dof_vel, + prev_actions_value, + imu_ang_vel, + imu_euler_xyz, + state_tensor, + ] + ) + + policy_output = policy(policy_input) + + outputs = kinfer_serializer.serialize_io(policy_output, as_dict=True) + + positions = outputs["actions"] + curr_actions = outputs["actions_raw"] + hist_obs = outputs["new_x"] target_q = positions @@ -305,17 +390,32 @@ def run_mujoco( x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.2, 0.0, 0.0 policy = ONNXModel(args.load_model) - metadata = policy.get_metadata() - model_info = { - "num_actions": metadata["num_actions"], - "num_observations": metadata["num_observations"], - "robot_effort": metadata["robot_effort"], - "robot_stiffness": metadata["robot_stiffness"], - "robot_damping": metadata["robot_damping"], - "sim_dt": metadata["sim_dt"], - "sim_decimation": metadata["sim_decimation"], - "tau_factor": metadata["tau_factor"], - } + metadata = policy.attached_metadata + + joint_names = [] + for value_schema in policy.input_schema.values: + if value_schema.HasField("joint_positions"): + joint_names = list(value_schema.joint_positions.joint_names) + break + + try: + model_info = { + "num_actions": len(joint_names), + "num_observations": metadata["num_observations"], + "robot_effort": [metadata["robot_effort"][joint] for joint in joint_names], + "robot_stiffness": [metadata["robot_stiffness"][joint] for joint in joint_names], + "robot_damping": [metadata["robot_damping"][joint] for joint in joint_names], + "sim_dt": metadata["sim_dt"], + "sim_decimation": metadata["sim_decimation"], + "tau_factor": metadata["tau_factor"], + "joint_names": joint_names, + "cycle_time": metadata["cycle_time"], + } + except Exception as e: + print( + f"Error finding required metadata 'num_actions', 'num_observations', 'robot_effort', 'robot_stiffness', 'robot_damping', 'sim_dt', 'sim_decimation', 'tau_factor' in metadata: {metadata}" + ) + raise e run_mujoco( embodiment=args.embodiment, diff --git a/sim/utils/helpers.py b/sim/utils/helpers.py index 2724896f..e985cb72 100755 --- a/sim/utils/helpers.py +++ b/sim/utils/helpers.py @@ -47,7 +47,8 @@ def class_to_dict(obj) -> dict: return obj result = {} for key in dir(obj): - if key.startswith("_"): + # Skip private attributes and known problematic ones + if key.startswith("_") or key in ["P", "input_schema", "output_schema"]: continue element = [] val = getattr(obj, key) diff --git a/sim/utils/resources.py b/sim/utils/resources.py new file mode 100644 index 00000000..3d353f2c --- /dev/null +++ b/sim/utils/resources.py @@ -0,0 +1,10 @@ +import importlib +from typing import Any + + +def load_embodiment(embodiment: str) -> Any: # noqa: ANN401 + # Dynamically import embodiment + module_name = f"sim.resources.{embodiment}.joints" + module = importlib.import_module(module_name) + robot = getattr(module, "Robot") + return robot diff --git a/third_party/kinfer b/third_party/kinfer new file mode 160000 index 00000000..e7b2dcd2 --- /dev/null +++ b/third_party/kinfer @@ -0,0 +1 @@ +Subproject commit e7b2dcd2b7728d853f778e23585b2f3eb26f6658