Skip to content

Commit

Permalink
Enable GUI rollout
Browse files Browse the repository at this point in the history
Signed-off-by: Arjo Chakravarty <[email protected]>
  • Loading branch information
arjo129 committed Mar 5, 2025
1 parent 856f316 commit f8ddbf6
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Example for Reinforcement Learning (RL) With Gazebo

This demo world shows you an example of how you can use SDFormat, Ray-RLLIB and Gazebo to perform RL with python.
This demo world shows you an example of how you can use SDFormat, Stable Baselines 3 and Gazebo to perform RL with python.
We start with a very simple cart-pole world. This world is defined in our sdf file `cart_pole.sdf`. It is analogous to
the

Expand All @@ -10,7 +10,7 @@ First create a virtual environment using python,
```
python3 -m venv venv
```
Lets activate it and install rayrllib and pytorch.
Lets activate it and install stablebaselines3 and pytorch.
```
. venv/bin/activate
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np

from gz.common6 import set_verbosity
from gz.sim9 import TestFixture, World, world_entity, Model, Link, run_gui
from gz.sim10 import TestFixture, World, world_entity, Model, Link, get_install_prefix
from gz.math8 import Vector3d
from gz.transport14 import Node
from gz.msgs11.world_control_pb2 import WorldControl
Expand All @@ -13,9 +13,17 @@

from stable_baselines3 import PPO
import time
import subprocess

file_path = os.path.dirname(os.path.realpath(__file__))

def run_gui():
if os.name == 'nt':
base = os.path.join(get_install_prefix(), "libexec", "runGui.exe")
else:
base = os.path.join(get_install_prefix(), "libexec", "runGui")
subprocess.Popen(base)

class GzRewardScorer:
def __init__(self):
self.fixture = TestFixture(os.path.join(file_path, 'cart_pole.sdf'))
Expand Down Expand Up @@ -99,13 +107,13 @@ def step(self, action):
obs, reward, done, truncated, info = self.env.step(action)
return obs, reward, done, truncated, info


env = CustomCartPole({})
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=25_000)

vec_env = model.get_env()
obs = vec_env.reset()

run_gui()
time.sleep(10)
for i in range(50000):
Expand Down

0 comments on commit f8ddbf6

Please sign in to comment.