Skip to content

Commit

Permalink
Merge branch 'dev' into more-ms3-docs
Browse files Browse the repository at this point in the history
  • Loading branch information
StoneT2000 committed Mar 6, 2024
2 parents 0b295b2 + 49d2b50 commit 45a790a
Show file tree
Hide file tree
Showing 160 changed files with 2,228 additions and 10,052 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ From pip:

```bash
# This is temporary in order to install a in-dev version of sapien 3
pip install manualtest/sapien-3.0.0.dev20240221+fa245b5-cp39-cp39-manylinux2014_x86_64.whl
pip install manualtest/sapien-3.0.0.dev20240305+5d84989-cp310-cp310-manylinux2014_x86_64.whl
```

From github:
Expand Down
2 changes: 1 addition & 1 deletion examples/baselines/ppo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Code adapted from [CleanRL](https://github.com/vwxyzjn/cleanrl/)
State based
```bash
python ppo.py --num_envs=1024 --update_epochs=8 --num_minibatches=32 --env_id="PickCube-v1" --total_timesteps=50000000
python ppo.py --num_envs=2048 --update_epochs=8 --num_minibatches=32 --env_id="PushCube-v1" --total_timesteps=100000000 --num-steps=12
python ppo.py --num_envs=2048 --update_epochs=8 --num_minibatches=32 --env_id="PushCube-v1" --total_timesteps=10000000 --eval_freq=10
python ppo.py --num_envs=1024 --update_epochs=8 --num_minibatches=32 --env_id="StackCube-v1" --total_timesteps=100000000
python ppo.py --num_envs=512 --update_epochs=8 --num_minibatches=32 --env_id="TwoRobotStackCube-v1" --total_timesteps=100000000 --num-steps=100
python ppo.py --num_envs=512 --update_epochs=8 --num_minibatches=32 --env_id="TwoRobotPickCube-v1" --total_timesteps=100000000 --num-steps=100
Expand Down
63 changes: 43 additions & 20 deletions examples/baselines/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class Args:
"""the number of parallel evaluation environments"""
num_steps: int = 50
"""the number of steps to run in each environment per policy rollout"""
num_eval_steps: int = 50
"""the number of steps to run in each evaluation environment during evaluation"""
anneal_lr: bool = False
"""Toggle learning rate annealing for policy and value networks"""
gamma: float = 0.8
Expand Down Expand Up @@ -183,9 +185,9 @@ def get_action_and_value(self, x, action=None):
envs = FlattenActionSpaceWrapper(envs)
eval_envs = FlattenActionSpaceWrapper(eval_envs)
if args.capture_video:
eval_envs = RecordEpisode(eval_envs, output_dir=f"runs/{run_name}/videos", save_trajectory=False, video_fps=30)
envs = ManiSkillVectorEnv(envs, args.num_envs, ignore_terminations=False, **env_kwargs)
eval_envs = ManiSkillVectorEnv(eval_envs, args.num_eval_envs, ignore_terminations=True, **env_kwargs)
eval_envs = RecordEpisode(eval_envs, output_dir=f"runs/{run_name}/videos", save_trajectory=False, max_steps_per_video=args.num_eval_steps, video_fps=30)
envs = ManiSkillVectorEnv(envs, args.num_envs, **env_kwargs)
eval_envs = ManiSkillVectorEnv(eval_envs, args.num_eval_envs, ignore_terminations=False, **env_kwargs)
assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"

agent = Agent(envs).to(device)
Expand Down Expand Up @@ -225,18 +227,37 @@ def clip_action(action: torch.Tensor):
if iteration % args.eval_freq == 1:
# evaluate
print("Evaluating")
eval_done = False
while not eval_done:
eval_envs.reset()
returns = []
eps_lens = []
successes = []
failures = []
for _ in range(args.num_eval_steps):
with torch.no_grad():
eval_obs, _, eval_terminations, eval_truncations, eval_infos = eval_envs.step(agent.get_action(eval_obs, deterministic=True))
if eval_truncations.any():
eval_done = True
info = eval_infos["final_info"]
episodic_return = info['episode']['r'].mean().cpu().numpy()
print(f"eval_episodic_return={episodic_return}")
writer.add_scalar("charts/eval_success_rate", info["success"].float().mean().cpu().numpy(), global_step)
writer.add_scalar("charts/eval_episodic_return", episodic_return, global_step)
writer.add_scalar("charts/eval_episodic_length", info["elapsed_steps"].float().mean().cpu().numpy(), global_step)
if "final_info" in eval_infos:
mask = eval_infos["_final_info"]
eps_lens.append(eval_infos["final_info"]["elapsed_steps"][mask].cpu().numpy())
returns.append(eval_infos["final_info"]["episode"]["r"][mask].cpu().numpy())
if "success" in eval_infos:
successes.append(eval_infos["final_info"]["success"][mask].cpu().numpy())
if "fail" in eval_infos:
failures.append(eval_infos["final_info"]["fail"][mask].cpu().numpy())
returns = np.concatenate(returns)
eps_lens = np.concatenate(eps_lens)
print(f"Evaluated {args.num_eval_steps * args.num_envs} steps resulting in {len(eps_lens)} episodes")
if len(successes) > 0:
successes = np.concatenate(successes)
writer.add_scalar("charts/eval_success_rate", successes.mean(), global_step)
print(f"eval_success_rate={successes.mean()}")
if len(failures) > 0:
failures = np.concatenate(failures)
writer.add_scalar("charts/eval_fail_rate", failures.mean(), global_step)
print(f"eval_fail_rate={failures.mean()}")

print(f"eval_episodic_return={returns.mean()}")
writer.add_scalar("charts/eval_episodic_return", returns.mean(), global_step)
writer.add_scalar("charts/eval_episodic_length", eps_lens.mean(), global_step)

if args.save_model and iteration % args.eval_freq == 1:
model_path = f"runs/{run_name}/{args.exp_name}_{iteration}.cleanrl_model"
Expand Down Expand Up @@ -266,15 +287,17 @@ def clip_action(action: torch.Tensor):
rewards[step] = reward.view(-1)

if "final_info" in infos:
info = infos["final_info"]
done_mask = info["_final_info"]
episodic_return = info['episode']['r'][done_mask].mean().cpu().numpy()
# print(f"global_step={global_step}, episodic_return={episodic_return}")
writer.add_scalar("charts/success_rate", info["success"][done_mask].float().mean().cpu().numpy(), global_step)
final_info = infos["final_info"]
done_mask = final_info["_final_info"]
episodic_return = final_info['episode']['r'][done_mask].cpu().numpy().mean()
if "success" in final_info:
writer.add_scalar("charts/success_rate", final_info["success"][done_mask].cpu().numpy().mean(), global_step)
if "fail" in final_info:
writer.add_scalar("charts/fail_rate", final_info["fail"][done_mask].cpu().numpy().mean(), global_step)
writer.add_scalar("charts/episodic_return", episodic_return, global_step)
writer.add_scalar("charts/episodic_length", info["elapsed_steps"][done_mask].float().mean().cpu().numpy(), global_step)
writer.add_scalar("charts/episodic_length", final_info["elapsed_steps"][done_mask].cpu().numpy().mean(), global_step)

final_values[step, torch.arange(args.num_envs, device=device)[done_mask]] = agent.get_value(info["final_observation"][done_mask]).view(-1)
final_values[step, torch.arange(args.num_envs, device=device)[done_mask]] = agent.get_value(final_info["final_observation"][done_mask]).view(-1)

# bootstrap value according to termination and truncation
with torch.no_grad():
Expand Down
6 changes: 0 additions & 6 deletions examples/benchmarking/benchmark_maniskill.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
# py-spy record -f speedscope -r 1000 -o profile -- python manualtest/benchmark_gpu_sim.py
# python manualtest/benchmark_orbit_sim.py --task "Isaac-Lift-Cube-Franka-v0" --num_envs 512 --headless
import argparse
import time

import gymnasium as gym
import numpy as np
import sapien
import sapien.physx
import sapien.render
import torch
import tqdm

import mani_skill2.envs
from mani_skill2.utils.scene_builder.ai2thor.variants import ArchitecTHORSceneBuilder
from mani_skill2.utils.scene_builder.replicacad.scene_builder import ReplicaCADSceneBuilder
from mani_skill2.vector.wrappers.gymnasium import ManiSkillVectorEnv
from profiling import Profiler
from mani_skill2.utils.visualization.misc import images_to_video, tile_images
Expand Down
1 change: 1 addition & 0 deletions mani_skill2/agents/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .registration import REGISTERED_AGENTS
15 changes: 5 additions & 10 deletions mani_skill2/agents/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,8 @@

from mani_skill2 import format_path
from mani_skill2.sensors.base_sensor import BaseSensor, BaseSensorConfig
from mani_skill2.utils.sapien_utils import (
apply_urdf_config,
check_urdf_config,
parse_urdf_config,
)
from mani_skill2.utils.structs.actor import Actor
from mani_skill2.utils.structs.articulation import Articulation
from mani_skill2.utils import sapien_utils
from mani_skill2.utils.structs import Actor, Articulation

from .controllers.base_controller import (
BaseController,
Expand Down Expand Up @@ -98,11 +93,11 @@ def _load_articulation(self):

urdf_path = format_path(str(self.urdf_path))

urdf_config = parse_urdf_config(self.urdf_config, self.scene)
check_urdf_config(urdf_config)
urdf_config = sapien_utils.parse_urdf_config(self.urdf_config, self.scene)
sapien_utils.check_urdf_config(urdf_config)

# TODO(jigu): support loading multiple convex collision shapes
apply_urdf_config(loader, urdf_config)
sapien_utils.apply_urdf_config(loader, urdf_config)
self.robot: Articulation = loader.load(urdf_path)
assert self.robot is not None, f"Fail to load URDF from {urdf_path}"

Expand Down
20 changes: 6 additions & 14 deletions mani_skill2/agents/controllers/base_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
get_active_joint_indices,
get_joints_by_names,
)
from mani_skill2.utils import sapien_utils
from mani_skill2.utils.common import clip_and_scale_action, normalize_action_space
from mani_skill2.utils.sapien_utils import to_tensor
from mani_skill2.utils.structs.articulation import Articulation
from mani_skill2.utils.structs.joint import Joint
from mani_skill2.utils.structs.types import Array
Expand All @@ -37,8 +37,10 @@ class BaseController:
"""the action space. If the number of parallel environments is > 1, this action space is also batched"""
single_action_space: spaces.Space
"""The unbatched version of the action space which is also typically already normalized by this class"""
"""The batched version of the action space which is also typically already normalized by this class"""
_original_single_action_space: spaces.Space
"""The unbatched, original action space without any additional processing like normalization"""
"""The batched, original action space without any additional processing like normalization"""

def __init__(
self,
Expand Down Expand Up @@ -162,8 +164,8 @@ def _clip_and_scale_action_space(self):
self._original_single_action_space.low,
self._original_single_action_space.high,
)
self.action_space_low = to_tensor(low)
self.action_space_high = to_tensor(high)
self.action_space_low = sapien_utils.to_tensor(low)
self.action_space_high = sapien_utils.to_tensor(high)

def _clip_and_scale_action(self, action):
return clip_and_scale_action(
Expand Down Expand Up @@ -205,7 +207,6 @@ def __init__(
)
self._initialize_action_space()
self._initialize_joints()
self._assert_fully_actuated()

self.action_space = self.single_action_space
if self.scene.num_envs > 1:
Expand All @@ -229,15 +230,6 @@ def _initialize_joints(self):
self.joints.extend(controller.joints)
self.joint_indices.extend(controller.joint_indices)

def _assert_fully_actuated(self):
active_joints = self.articulation.get_active_joints()
if len(active_joints) != len(self.joints) or set(active_joints) != set(
self.joints
):
print("active_joints:", [x.name for x in active_joints])
print("controlled_joints:", [x.name for x in self.joints])
raise AssertionError("{} is not fully actuated".format(self.articulation))

def set_drive_property(self):
for controller in self.controllers.values():
controller.set_drive_property()
Expand All @@ -251,7 +243,7 @@ def reset(self):

def set_action(self, action: Dict[str, np.ndarray]):
for uid, controller in self.controllers.items():
controller.set_action(to_tensor(action[uid]))
controller.set_action(sapien_utils.to_tensor(action[uid]))

def before_simulation_step(self):
if physx.is_gpu_enabled():
Expand Down
32 changes: 23 additions & 9 deletions mani_skill2/agents/controllers/pd_ee_pose.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@
import torch
from gymnasium import spaces

from mani_skill2 import logger
from mani_skill2.utils import sapien_utils
from mani_skill2.utils.common import clip_and_scale_action
from mani_skill2.utils.geometry.rotation_conversions import (
euler_angles_to_matrix,
matrix_to_quaternion,
)
from mani_skill2.utils.sapien_utils import get_obj_by_name, to_numpy, to_tensor
from mani_skill2.utils.structs.pose import Pose, vectorize_pose
from mani_skill2.utils.structs.types import Array

Expand All @@ -28,6 +29,7 @@
# NOTE(jigu): not necessary to inherit, just for convenience
class PDEEPosController(PDJointPosController):
config: "PDEEPosControllerConfig"
_target_pose = None

def _initialize_joints(self):
self.initial_qpos = None
Expand Down Expand Up @@ -59,12 +61,15 @@ def suppress_stdout_stderr():
self.qmask[self.joint_indices] = 1

if self.config.ee_link:
self.ee_link = get_obj_by_name(
self.ee_link = sapien_utils.get_obj_by_name(
self.articulation.get_links(), self.config.ee_link
)
else:
# The child link of last joint is assumed to be the end-effector.
self.ee_link = self.joints[-1].get_child_link()
logger.warn(
"Configuration did not define a ee_link name, using the child link of the last joint"
)
self.ee_link_idx = self.articulation.get_links().index(self.ee_link)

def _initialize_action_space(self):
Expand All @@ -87,7 +92,13 @@ def ee_pose_at_base(self):

def reset(self):
super().reset()
self._target_pose = self.ee_pose_at_base
if self._target_pose is None:
self._target_pose = self.ee_pose_at_base
else:
# TODO (stao): this is a strange way to mask setting individual batched pose parts
self._target_pose.raw_pose[
self.scene._reset_mask
] = self.ee_pose_at_base.raw_pose[self.scene._reset_mask]

def compute_ik(self, target_pose: Pose, action: Array, max_iterations=100):
# Assume the target pose is defined in the base frame
Expand All @@ -105,12 +116,14 @@ def compute_ik(self, target_pose: Pose, action: Array, max_iterations=100):
result, success, error = self.pmodel.compute_inverse_kinematics(
self.ee_link_idx,
target_pose.sp,
initial_qpos=to_numpy(self.articulation.get_qpos()).squeeze(0),
initial_qpos=sapien_utils.to_numpy(
self.articulation.get_qpos()
).squeeze(0),
active_qmask=self.qmask,
max_iterations=max_iterations,
)
if success:
return to_tensor([result[self.joint_indices]])
return sapien_utils.to_tensor([result[self.joint_indices]])
else:
return None

Expand Down Expand Up @@ -152,13 +165,14 @@ def set_action(self, action: Array):

def get_state(self) -> dict:
if self.config.use_target:
return {"target_pose": vectorize_pose(self._target_pose)}
return {"target_pose": self._target_pose.raw_pose}
return {}

def set_state(self, state: dict):
if self.config.use_target:
target_pose = state["target_pose"]
self._target_pose = sapien.Pose(target_pose[:3], target_pose[3:])
# if self.config.use_target:
# target_pose = state["target_pose"]
# self._target_pose = sapien.Pose(target_pose[:3], target_pose[3:])
raise NotImplementedError()


@dataclass
Expand Down
27 changes: 21 additions & 6 deletions mani_skill2/agents/controllers/pd_joint_pos.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@
import torch
from gymnasium import spaces

from mani_skill2.utils.sapien_utils import to_tensor
from mani_skill2.utils import sapien_utils
from mani_skill2.utils.structs.types import Array

from .base_controller import BaseController, ControllerConfig


class PDJointPosController(BaseController):
config: "PDJointPosControllerConfig"
_start_qpos = None
_target_qpos = None

def _get_joint_limits(self):
qlimits = self.articulation.get_qlimits()[0, self.joint_indices].cpu().numpy()
Expand Down Expand Up @@ -44,8 +46,19 @@ def set_drive_property(self):
def reset(self):
super().reset()
self._step = 0 # counter of simulation steps after action is set
self._start_qpos = self.qpos
self._target_qpos = self.qpos
if self._start_qpos is None:
self._start_qpos = self.qpos.clone()
else:

self._start_qpos[self.scene._reset_mask] = self.qpos[
self.scene._reset_mask
].clone()
if self._target_qpos is None:
self._target_qpos = self.qpos.clone()
else:
self._target_qpos[self.scene._reset_mask] = self.qpos[
self.scene._reset_mask
].clone()

def set_drive_targets(self, targets):
self.articulation.set_joint_drive_targets(
Expand All @@ -54,7 +67,7 @@ def set_drive_targets(self, targets):

def set_action(self, action: Array):
action = self._preprocess_action(action)
action = to_tensor(action)
action = sapien_utils.to_tensor(action)
self._step = 0
self._start_qpos = self.qpos
if self.config.use_delta:
Expand All @@ -63,8 +76,10 @@ def set_action(self, action: Array):
else:
self._target_qpos = self._start_qpos + action
else:
# Compatible with mimic controllers
self._target_qpos = torch.broadcast_to(action, self._start_qpos.shape)
# Compatible with mimic controllers. Need to clone here otherwise cannot do in-place replacements in the reset function
self._target_qpos = torch.broadcast_to(
action, self._start_qpos.shape
).clone()
if self.config.interpolate:
self._step_size = (self._target_qpos - self._start_qpos) / self._sim_steps
else:
Expand Down
Loading

0 comments on commit 45a790a

Please sign in to comment.