Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset generation for MuJoCo environments #17

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions scripts/mujoco/create_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import gymnasium as gym
import minari
from huggingface_sb3 import load_from_hub
from stable_baselines3 import PPO, SAC, TD3
from tqdm import tqdm

ENV_IDS = [
("InvertedPendulum", ("medium", "expert"), 100_000, "SAC"),
("InvertedDoublePendulum", ("medium", "expert"), 100_000, "SAC"),
("Reacher", ("medium", "expert"), 500_000, "SAC"),
("Pusher", ("medium", "expert"), 500_000, "SAC"),
("HalfCheetah", ("simple", "medium", "expert"), 1_000_000, "SAC"),
("Hopper", ("simple", "medium", "expert"), 1_000_000, "SAC"),
("Walker2d", ("simple", "medium", "expert"), 1_000_000, "SAC"),
("Swimmer", ("expert"), 1_000_000, "SAC"),
("Ant", ("simple", "medium"), 1_000_000, "SAC"),
("Humanoid", ("simple", "medium", "expert"), 1_000_000, "SAC"),
Kallinteris-Andreas marked this conversation as resolved.
Show resolved Hide resolved
#("HumanoidStandup", ("simple", "medium", "expert"), 1_000_000, "SAC"),
]


def create_dataset_from_policy(dataset_id, collector_env, policy, n_steps: int, algorithm_name):
truncated = True
terminated = True
seed = 123
for step in tqdm(range(n_steps)):
if terminated or truncated:
obs, _ = env.reset(seed=seed)
seed += 1
if (n_steps - step) < collector_env.spec.max_episode_steps : # trim trailing non-full episodes
break

action = policy(obs)
obs, _, terminated, truncated, _ = env.step(action)

return collector_env.create_dataset(
dataset_id=f"mujoco/{dataset_id}",
algorithm_name="SB3/{algorithm_name}",
code_permalink="https://github.com/Farama-Foundation/minari-dataset-generation-scripts",
author="Kallinteris Andreas",
author_email="[email protected]",
requirements=[
"mujoco==3.2.3",
Comment on lines +37 to +43
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you may want to add a description for each dataset. These are encoded in markdown in the documentation.

For readability, you can create a folder with all the description files .md for each dataset, and here read from the file based on dataset_id

],
Kallinteris-Andreas marked this conversation as resolved.
Show resolved Hide resolved
)


def load_policy(env_id: str, algo: str, proficiency: str):
model_checkpoint = load_from_hub(
repo_id=f"farama-minari/{env_id}-v5-{algo.upper()}-{proficiency}",
filename=f"{env_id.lower()}-v5-{algo.lower()}-{proficiency}.zip",
)

match algo:
case "SAC":
policy = SAC.load(model_checkpoint)
case "TD3":
policy = TD3.load(model_checkpoint)
case "PPO":
policy = PPO.load(model_checkpoint)

return policy


if __name__ == "__main__":
for env_run_spec in ENV_IDS:
# unpack dataset spec
env_id = env_run_spec[0]
proficiencies = env_run_spec[1]
n_steps = env_run_spec[2]
algo = env_run_spec[3]


# make datasets
print(f"\nCREATING EXPERT DATASET FOR {env_id}")
if "expert" in proficiencies:
env = gym.make(f"{env_id}-v5")
env = minari.DataCollector(env, record_infos=False) # TODO record_info?

expert_policy = load_policy(env_id, algo, "expert")
expert_dataset = create_dataset_from_policy(
f"{env_id.lower()}/expert-v0",
env,
lambda x: expert_policy.predict(x)[0],
n_steps,
algo,
)

print(f"\nCREATING MEDIUM DATASET FOR {env_id}")
if "medium" in proficiencies:
env = gym.make(f"{env_id}-v5")
env = minari.DataCollector(env, record_infos=False) # TODO record_info?

medium_policy = load_policy(env_id, algo, "medium")
medium_dataset = create_dataset_from_policy(
f"{env_id.lower()}/medium-v0",
env,
lambda x: medium_policy.predict(x)[0],
n_steps,
algo,
)

print(f"\nCREATING SIMPLE DATASET FOR {env_id}")
if "simple" in proficiencies:
env = gym.make(f"{env_id}-v5")
env = minari.DataCollector(env, record_infos=False) # TODO record_info?

simple_policy = load_policy(env_id, algo, "simple")
simple_dataset = create_dataset_from_policy(
f"{env_id.lower()}/simple-v0",
env,
lambda x: simple_policy.predict(x)[0],
n_steps,
algo,
)
Kallinteris-Andreas marked this conversation as resolved.
Show resolved Hide resolved
142 changes: 142 additions & 0 deletions scripts/mujoco/create_dataset_humanoid_standup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import gymnasium as gym
import minari
import numpy as np
from huggingface_sb3 import load_from_hub
from stable_baselines3 import PPO, SAC, TD3
from tqdm import tqdm

ENV_IDS = [
("HumanoidStandup", ("simple", "medium", "expert"), 1_000_000, "SAC"),
]

class AddExcludedObservationElements(minari.StepDataCallback):
"""Add Excluded observation elements like `cfrc_ext` to the observation space."""

def __call__(self, env, **kwargs):
step_data = super().__call__(env, **kwargs)
if getattr(env.unwrapped, "_include_cinert_in_observation", True) is False:
step_data["observation"] = np.concatenate(
[step_data["observation"], env.unwrapped.data.cinert[1:].flatten()]
)
if getattr(env.unwrapped, "_include_cvel_in_observation", True) is False:
step_data["observation"] = np.concatenate(
[step_data["observation"], env.unwrapped.data.cvel[1:].flatten()]
)
if getattr(env.unwrapped, "_include_qfrc_actuator_in_observation", True) is False:
step_data["observation"] = np.concatenate(
[step_data["observation"], env.unwrapped.data.qfrc_actuator[6:].flatten()]
)
if getattr(env.unwrapped, "_include_cfrc_ext_in_observation", True) is False:
step_data["observation"] = np.concatenate(
[step_data["observation"], env.unwrapped.data.cfrc_ext[1:].flat.copy()]
)

return step_data


def create_dataset_from_policy(dataset_id, collector_env, policy, n_steps: int, algorithm_name):
truncated = True
terminated = True
seed = 123
for step in tqdm(range(n_steps)):
if terminated or truncated:
obs, _ = env.reset(seed=seed)
seed += 1
if (n_steps - step) < collector_env.spec.max_episode_steps : # trim trailing non-full episodes
break

action = policy(obs)
obs, _, terminated, truncated, _ = env.step(action)

return collector_env.create_dataset(
dataset_id=f"mujoco/{dataset_id}",
algorithm_name="SB3/{algorithm_name}",
code_permalink="https://github.com/Farama-Foundation/minari-dataset-generation-scripts",
author="Kallinteris Andreas",
author_email="[email protected]",
requirements=[
"mujoco==3.2.3",
],
)


def load_policy(env_id: str, algo: str, proficiency: str):
model_checkpoint = load_from_hub(
repo_id=f"farama-minari/{env_id}-v5-{algo.upper()}-{proficiency}",
filename=f"{env_id.lower()}-v5-{algo.lower()}-{proficiency}.zip",
)

match algo:
case "SAC":
policy = SAC.load(model_checkpoint)
case "TD3":
policy = TD3.load(model_checkpoint)
case "PPO":
policy = PPO.load(model_checkpoint)

return policy


def make_env(env_id: str):
"""Wrapper to create the appropriate environment."""
if env_id == "HumanoidStandup":
env = gym.make(f"{env_id}-v5", include_cinert_in_observation=False, include_cvel_in_observation=False, include_qfrc_actuator_in_observation=False, include_cfrc_ext_in_observation=False)
else:
env = gym.make(f"{env_id}-v5", render_mode="rgb_array",)
return env
Kallinteris-Andreas marked this conversation as resolved.
Show resolved Hide resolved


if __name__ == "__main__":
for env_run_spec in ENV_IDS:
# unpack dataset spec
env_id = env_run_spec[0]
proficiencies = env_run_spec[1]
n_steps = env_run_spec[2]
algo = env_run_spec[3]


# make datasets
print(f"\nCREATING EXPERT DATASET FOR {env_id}")
if "expert" in proficiencies:
env = make_env(env_id)
env.spec.kwargs = {} # overwrite the spec for the dataset since we include the observations with the callback
env = minari.DataCollector(env, step_data_callback=AddExcludedObservationElements, record_infos=False, observation_space=gym.spaces.Box(-np.inf, np.inf, (348,), np.float64)) # TODO record_info?

expert_policy = load_policy(env_id, algo, "expert")
expert_dataset = create_dataset_from_policy(
f"{env_id.lower()}/expert-v0",
env,
lambda x: expert_policy.predict(x)[0],
n_steps,
algo,
)

print(f"\nCREATING MEDIUM DATASET FOR {env_id}")
if "medium" in proficiencies:
env = make_env(env_id)
env.spec.kwargs = {} # overwrite the spec for the dataset since we include the observations with the callback
env = minari.DataCollector(env, step_data_callback=AddExcludedObservationElements, record_infos=False, observation_space=gym.spaces.Box(-np.inf, np.inf, (348,), np.float64)) # TODO record_info?

medium_policy = load_policy(env_id, algo, "medium")
medium_dataset = create_dataset_from_policy(
f"{env_id.lower()}/medium-v0",
env,
lambda x: medium_policy.predict(x)[0],
n_steps,
algo,
)

print(f"\nCREATING SIMPLE DATASET FOR {env_id}")
if "simple" in proficiencies:
env = make_env(env_id)
env.spec.kwargs = {} # overwrite the spec for the dataset since we include the observations with the callback
env = minari.DataCollector(env, step_data_callback=AddExcludedObservationElements, record_infos=False, observation_space=gym.spaces.Box(-np.inf, np.inf, (348,), np.float64)) # TODO record_info?

simple_policy = load_policy(env_id, algo, "simple")
simple_dataset = create_dataset_from_policy(
f"{env_id.lower()}/simple-v0",
env,
lambda x: simple_policy.predict(x)[0],
n_steps,
algo,
)
Kallinteris-Andreas marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 6 additions & 0 deletions scripts/mujoco/evaluate_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import minari

for dataset_id in minari.list_local_datasets().keys():
ds = minari.load_dataset(dataset_id)
episodic_return = sum([ep.rewards.sum() / len(ds) for ep in ds])
print(f"{dataset_id} - return: {episodic_return}")
Kallinteris-Andreas marked this conversation as resolved.
Show resolved Hide resolved
30 changes: 30 additions & 0 deletions scripts/mujoco/evaluate_policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import gymnasium as gym
from huggingface_sb3 import load_from_hub
from stable_baselines3 import SAC
from stable_baselines3.common.evaluation import evaluate_policy

ENV_ID = "Ant"
ALGORITHM = "SAC"
# PROFICIENCY = "medium"
PROFICIENCY = "expert-fine-tuned"

eval_env = gym.make(f"{ENV_ID}-v5")
eval_env = gym.make(f"{ENV_ID}-v5", include_cfrc_ext_in_observation=False)

match ALGORITHM:
case "SAC":
expert_model_checkpoint = load_from_hub(
repo_id=f"farama-minari/{ENV_ID}-v5-{ALGORITHM.upper()}-{PROFICIENCY}",
filename=f"{ENV_ID.lower()}-v5-{ALGORITHM.lower()}-{PROFICIENCY}.zip",
)
model = SAC.load(expert_model_checkpoint)
case "TD3":
None
case "PPO":
None
Kallinteris-Andreas marked this conversation as resolved.
Show resolved Hide resolved
print("MODEL LOADED")


mean_reward, std_reward = evaluate_policy(model, eval_env, render=False, n_eval_episodes=1000, deterministic=True, warn=False)
print(f"{ENV_ID}-v5/{PROFICIENCY}/{ALGORITHM}")
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
Loading