Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update and fix dependencies related to mac install #1044

Merged
merged 13 commits into from
Feb 6, 2024
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,23 @@ You can also install the dev requirements by adding `--with dev` or the extras
for say mujoco and acceleration by [envpool](https://github.com/sail-sg/envpool)
by adding `--extras mujoco envpool`

Available extras are:
- `atari` (for Atari environments)
- `box2d` (for Box2D environments)
- `classic_control` (for classic control (discrete) environments)
- `mujoco` (for MuJoCo environments)
- `mujoco-py` (for legacy mujoco-py environments[^1])
- `pybullet` (for pybullet environments)
- `robotics` (for gymnasium-robotics environments)
- `vizdoom` (for ViZDoom environments)
- `envpool` (for [envpool](https://github.com/sail-sg/envpool/) integration)
- `argparse` (in order to be able to run the high level API examples)

[^1]: `mujoco-py` is a legacy package and is not recommended for new projects.
It is only included for compatibility with older projects.
Also note that there may be compatibility issues with macOS newer than
Monterey.

Otherwise, you can install the latest release from PyPI (currently
far behind the master) with the following command:

Expand Down Expand Up @@ -216,6 +233,8 @@ We shall apply the deep Q network (DQN) learning algorithm using both APIs.

### High-Level API

The high-level API requires the extra package `argparse` (by adding
`--extras argparse`) to be installed.
To get started, we need some imports.

```python
Expand Down
2 changes: 1 addition & 1 deletion examples/box2d/acrobot_dualdqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get_args() -> argparse.Namespace:
parser.add_argument("--dueling-q-hidden-sizes", type=int, nargs="*", default=[128, 128])
parser.add_argument("--dueling-v-hidden-sizes", type=int, nargs="*", default=[128, 128])
parser.add_argument("--training-num", type=int, default=10)
parser.add_argument("--test-num", type=int, default=100)
parser.add_argument("--test-num", type=int, default=10)
parser.add_argument("--logdir", type=str, default="log")
parser.add_argument("--render", type=float, default=0.0)
parser.add_argument(
Expand Down
9 changes: 5 additions & 4 deletions examples/box2d/bipedal_bdq.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,11 @@ def test_bdq(args: argparse.Namespace = get_args()) -> None:
device=args.device,
).to(args.device)
optim = torch.optim.Adam(net.parameters(), lr=args.lr)
policy: BranchingDQNPolicy = BranchingDQNPolicy(
net,
optim,
args.gamma,
policy = BranchingDQNPolicy(
model=net,
optim=optim,
discount_factor=args.gamma,
action_space=env.action_space,
target_update_freq=args.target_update_freq,
)
# collector
Expand Down
2 changes: 1 addition & 1 deletion examples/box2d/bipedal_hardcore_sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_args() -> argparse.Namespace:
parser.add_argument("--batch-size", type=int, default=128)
parser.add_argument("--hidden-sizes", type=int, nargs="*", default=[128, 128])
parser.add_argument("--training-num", type=int, default=10)
parser.add_argument("--test-num", type=int, default=100)
parser.add_argument("--test-num", type=int, default=10)
parser.add_argument("--logdir", type=str, default="log")
parser.add_argument("--render", type=float, default=0.0)
parser.add_argument("--n-step", type=int, default=4)
Expand Down
2 changes: 1 addition & 1 deletion examples/box2d/lunarlander_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_args() -> argparse.Namespace:
parser.add_argument("--dueling-q-hidden-sizes", type=int, nargs="*", default=[128, 128])
parser.add_argument("--dueling-v-hidden-sizes", type=int, nargs="*", default=[128, 128])
parser.add_argument("--training-num", type=int, default=16)
parser.add_argument("--test-num", type=int, default=100)
parser.add_argument("--test-num", type=int, default=10)
parser.add_argument("--logdir", type=str, default="log")
parser.add_argument("--render", type=float, default=0.0)
parser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion examples/box2d/mcc_sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_args() -> argparse.Namespace:
parser.add_argument("--batch-size", type=int, default=128)
parser.add_argument("--hidden-sizes", type=int, nargs="*", default=[128, 128])
parser.add_argument("--training-num", type=int, default=5)
parser.add_argument("--test-num", type=int, default=100)
parser.add_argument("--test-num", type=int, default=10)
parser.add_argument("--logdir", type=str, default="log")
parser.add_argument("--render", type=float, default=0.0)
parser.add_argument(
Expand Down
5 changes: 1 addition & 4 deletions examples/mujoco/fetch_her_ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import gymnasium as gym
import numpy as np
import torch
import wandb
from torch.utils.tensorboard import SummaryWriter


Expand All @@ -32,7 +31,7 @@

def get_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("--task", type=str, default="FetchReach-v3")
parser.add_argument("--task", type=str, default="FetchReach-v2")
MischaPanch marked this conversation as resolved.
Show resolved Hide resolved
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--buffer-size", type=int, default=100000)
parser.add_argument("--hidden-sizes", type=int, nargs="*", default=[256, 256])
Expand Down Expand Up @@ -105,8 +104,6 @@ def test_ddpg(args: argparse.Namespace = get_args()) -> None:
config=args,
project=args.wandb_project,
)
logger.wandb_run.config.setdefaults(vars(args))
args = argparse.Namespace(**wandb.config)
writer = SummaryWriter(log_path)
writer.add_text("args", str(args))
if args.logger == "tensorboard":
Expand Down
2 changes: 1 addition & 1 deletion examples/mujoco/mujoco_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def get_args() -> argparse.Namespace:
parser.add_argument("--step-per-collect", type=int, default=2048)
parser.add_argument("--repeat-per-collect", type=int, default=10)
parser.add_argument("--batch-size", type=int, default=64)
parser.add_argument("--training-num", type=int, default=64)
parser.add_argument("--training-num", type=int, default=8)
parser.add_argument("--test-num", type=int, default=10)
# ppo special
parser.add_argument("--rew-norm", type=int, default=True)
Expand Down
2 changes: 1 addition & 1 deletion examples/mujoco/mujoco_ppo_hl.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def main(
step_per_collect: int = 2048,
repeat_per_collect: int = 10,
batch_size: int = 64,
training_num: int = 64,
training_num: int = 10,
test_num: int = 10,
rew_norm: bool = True,
vf_coef: float = 0.25,
Expand Down
2 changes: 1 addition & 1 deletion examples/mujoco/mujoco_reinforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def get_args() -> argparse.Namespace:
parser.add_argument("--repeat-per-collect", type=int, default=1)
# batch-size >> step-per-collect means calculating all data in one singe forward.
parser.add_argument("--batch-size", type=int, default=None)
parser.add_argument("--training-num", type=int, default=64)
parser.add_argument("--training-num", type=int, default=10)
parser.add_argument("--test-num", type=int, default=10)
# reinforce special
parser.add_argument("--rew-norm", type=int, default=True)
Expand Down
2 changes: 1 addition & 1 deletion examples/mujoco/mujoco_reinforce_hl.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def main(
step_per_collect: int = 2048,
repeat_per_collect: int = 1,
batch_size: int | None = None,
training_num: int = 64,
training_num: int = 10,
test_num: int = 10,
rew_norm: bool = True,
action_bound_method: Literal["clip", "tanh"] = "tanh",
Expand Down
2 changes: 1 addition & 1 deletion examples/vizdoom/vizdoom_c51.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_args() -> argparse.Namespace:
parser.add_argument("--update-per-step", type=float, default=0.1)
parser.add_argument("--batch-size", type=int, default=64)
parser.add_argument("--training-num", type=int, default=10)
parser.add_argument("--test-num", type=int, default=100)
parser.add_argument("--test-num", type=int, default=10)
parser.add_argument("--logdir", type=str, default="log")
parser.add_argument("--render", type=float, default=0.0)
parser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion examples/vizdoom/vizdoom_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def get_args() -> argparse.Namespace:
parser.add_argument("--batch-size", type=int, default=256)
parser.add_argument("--hidden-size", type=int, default=512)
parser.add_argument("--training-num", type=int, default=10)
parser.add_argument("--test-num", type=int, default=100)
parser.add_argument("--test-num", type=int, default=10)
parser.add_argument("--rew-norm", type=int, default=False)
parser.add_argument("--vf-coef", type=float, default=0.5)
parser.add_argument("--ent-coef", type=float, default=0.01)
Expand Down
Loading
Loading