feat: add reinforce with baseline

EdanToledo · Mar 8, 2024 · b283400 · b283400
1 parent 10bba9f
commit b283400
Show file tree

Hide file tree

Showing 11 changed files with 680 additions and 8 deletions.
diff --git a/stoix/configs/default_ff_reinforce.yaml b/stoix/configs/default_ff_reinforce.yaml
@@ -0,0 +1,7 @@
+defaults:
+  - logger: ff_reinforce
+  - arch: anakin
+  - system: ff_reinforce
+  - network: mlp
+  - env: gymnax/cartpole
+  - _self_
diff --git a/stoix/configs/env/debug/identity.yaml b/stoix/configs/env/debug/identity.yaml
@@ -0,0 +1,9 @@
+# ---Environment Configs---
+env_name: debug
+
+scenario:
+  name: debug
+  task_name: debug
+
+kwargs:
+  num_actions: 4
diff --git a/stoix/configs/env/gymnax/acrobot.yaml b/stoix/configs/env/gymnax/acrobot.yaml
@@ -0,0 +1,8 @@
+# ---Environment Configs---
+env_name: gymnax
+
+scenario:
+  name: Acrobot-v1
+  task_name: acrobot
+
+kwargs: {}
diff --git a/stoix/configs/env/gymnax/pendulum.yaml b/stoix/configs/env/gymnax/pendulum.yaml
@@ -0,0 +1,8 @@
+# ---Environment Configs---
+env_name: gymnax
+
+scenario:
+  name: Pendulum-v1
+  task_name: pendulum
+
+kwargs: {}
diff --git a/stoix/configs/logger/ff_reinforce.yaml b/stoix/configs/logger/ff_reinforce.yaml
@@ -0,0 +1,4 @@
+defaults:
+    - base_logger
+
+system_name: ff_reinforce
diff --git a/stoix/configs/system/ff_reinforce.yaml b/stoix/configs/system/ff_reinforce.yaml
@@ -0,0 +1,17 @@
+# --- Defaults FF-REINFORCE ---
+
+total_timesteps: 1e8 # Set the total environment steps.
+# If unspecified, it's derived from num_updates; otherwise, num_updates adjusts based on this value.
+num_updates: ~ # Number of updates
+seed: 42
+
+# --- RL hyperparameters ---
+actor_lr: 3e-4 # Learning rate for actor network
+critic_lr: 3e-4 # Learning rate for critic network
+update_batch_size: 1 # Number of vectorised gradient updates per device.
+rollout_length: 16 # Number of environment steps per vectorised environment.
+gamma: 0.99 # Discounting factor.
+ent_coef: 0.001 # Entropy regularisation term for loss function.
+vf_coef: 1.0 # Critic weight in
+max_grad_norm: 0.5 # Maximum norm of the gradients for a weight update.
+decay_learning_rates: False # Whether learning rates should be linearly decayed during training.