From 08489b46faffe3bfdeaa1287fa108260e74e194d Mon Sep 17 00:00:00 2001 From: Reggie McLean Date: Mon, 29 Jul 2024 10:26:27 -0400 Subject: [PATCH] expert actions page --- docs/index.md | 2 +- docs/usage/basic_usage.md | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 docs/usage/basic_usage.md diff --git a/docs/index.md b/docs/index.md index d236a704b..330d76293 100644 --- a/docs/index.md +++ b/docs/index.md @@ -43,7 +43,7 @@ obs, reward, terminate, truncate, info = env.step(a) introduction/basic_usage installation/installation rendering/rendering - +usage/basic_usage ``` diff --git a/docs/usage/basic_usage.md b/docs/usage/basic_usage.md new file mode 100644 index 000000000..cc2443ff9 --- /dev/null +++ b/docs/usage/basic_usage.md @@ -0,0 +1,36 @@ +--- +layout: "contents" +title: Generate data with expert policies +firstpage: +--- + +# Generate data with expert policies + +## Expert Policies +For each individual environment in Meta-World (i.e. reach, basketball, sweep) there are expert policies that solve the task. These policies can be used to generate expert data for imitation learning tasks. + +## Using Expert Policies +The below example provides sample code for the reach environment. This code can be extended to the ML10/ML45/MT10/MT50 sets if a list of policies is maintained. + + +```python +from metaworld import MT1 + +from metaworld.policies.sawyer_reach_v2_policy import SawyerReachV2Policy as p + +mt1 = MT1('reach-v2', seed=42) +env = mt1.train_classes['reach-v2']() +env.set_task(mt1.train_tasks[0]) +obs, info = env.reset() + +policy = p() + +done = False + +while not done: + a = policy.get_action(obs) + obs, _, _, _, info = env.step(a) + done = int(info['success']) == 1 + + +```