-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathplay.py
156 lines (133 loc) · 5.82 KB
/
play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# Original Code:
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# Modifications:
# Copyright (c) 2024, Irvin Hwang
# SPDX-License-Identifier: BSD-3-Clause
"""Script to play a checkpoint if an RL agent from RL-Games."""
"""Launch Isaac Sim Simulator first."""
import argparse
import pdb
from omni.isaac.lab.app import AppLauncher
# add argparse arguments
parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from RL-Games.")
parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
parser.add_argument(
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
)
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.")
parser.add_argument(
"--use_last_checkpoint",
action="store_true",
help="When no checkpoint provided, use the last saved model. Otherwise use the best saved model.",
)
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli = parser.parse_args()
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
import math
import os
import torch
from rl_games.common import env_configurations, vecenv
from rl_games.common.player import BasePlayer
from rl_games.torch_runner import Runner
from omni.isaac.lab.utils.assets import retrieve_file_path
import jetbot
from omni.isaac.lab_tasks.utils import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
from omni.isaac.lab_tasks.utils.wrappers.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper
def main():
"""Play with RL-Games agent."""
# parse env configuration
env_cfg = parse_env_cfg(
args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
)
agent_cfg = load_cfg_from_registry(args_cli.task, "rl_games_cfg_entry_point")
# wrap around environment for rl-games
rl_device = agent_cfg["params"]["config"]["device"]
clip_obs = agent_cfg["params"]["env"].get("clip_observations", math.inf)
clip_actions = agent_cfg["params"]["env"].get("clip_actions", math.inf)
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg)
# wrap around environment for rl-games
env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions)
# register the environment to rl-games registry
# note: in agents configuration: environment name must be "rlgpu"
vecenv.register(
"IsaacRlgWrapper", lambda config_name, num_actors, **kwargs: RlGamesGpuEnv(config_name, num_actors, **kwargs)
)
env_configurations.register("rlgpu", {"vecenv_type": "IsaacRlgWrapper", "env_creator": lambda **kwargs: env})
# specify directory for logging experiments
log_root_path = os.path.join("logs", "rl_games", agent_cfg["params"]["config"]["name"])
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Loading experiment from directory: {log_root_path}")
# find checkpoint
if args_cli.checkpoint is None:
# specify directory for logging runs
run_dir = agent_cfg["params"]["config"].get("full_experiment_name", ".*")
# specify name of checkpoint
if args_cli.use_last_checkpoint:
checkpoint_file = ".*"
else:
# this loads the best checkpoint
checkpoint_file = f"{agent_cfg['params']['config']['name']}.pth"
# get path to previous checkpoint
resume_path = get_checkpoint_path(log_root_path, run_dir, checkpoint_file, other_dirs=["nn"])
else:
resume_path = retrieve_file_path(args_cli.checkpoint)
# load previously trained model
agent_cfg["params"]["load_checkpoint"] = True
agent_cfg["params"]["load_path"] = resume_path
print(f"[INFO]: Loading model checkpoint from: {agent_cfg['params']['load_path']}")
# set number of actors into agent config
agent_cfg["params"]["config"]["num_actors"] = env.unwrapped.num_envs
# create runner from rl-games
runner = Runner()
runner.load(agent_cfg)
# obtain the agent from the runner
agent: BasePlayer = runner.create_player()
agent.restore(resume_path)
agent.reset()
# reset environment
obs = env.reset()
if isinstance(obs, dict):
obs = obs["obs"]
# required: enables the flag for batched observations
_ = agent.get_batch_size(obs, 1)
# initialize RNN states if used
if agent.is_rnn:
agent.init_rnn()
# simulate environment
# note: We simplified the logic in rl-games player.py (:func:`BasePlayer.run()`) function in an
# attempt to have complete control over environment stepping. However, this removes other
# operations such as masking that is used for multi-agent learning by RL-Games.
while simulation_app.is_running():
# run everything in inference mode
with torch.inference_mode():
# convert obs to agent format
obs = agent.obs_to_torch(obs)
# agent stepping
pdb.set_trace()
actions = agent.get_action(obs, is_deterministic=True)
# env stepping
obs, _, dones, _ = env.step(actions)
# perform operations for terminated episodes
if len(dones) > 0:
# reset rnn state for terminated episodes
if agent.is_rnn and agent.states is not None:
for s in agent.states:
s[:, dones, :] = 0.0
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()