diff --git a/README.md b/README.md index 203a0de1..d0fbcae9 100644 --- a/README.md +++ b/README.md @@ -18,5 +18,5 @@ Clone the repo and run `pre-commit install` to setup the pre-commit hooks. 3. Define the factory functions to create your class: `parallel_env` returns a parallel version of the env, `env` returns an AEC version, and `raw_env` that is the pure class constructor (it is not used in practice). (!) use the conversions that are defined inside our repository, e.g. `mo_parallel_to_aec` instead of `parallel_to_aec` from PZ. 4. (!) do not use `OrderEnforcingWrapper`, it prevents from accessing the `reward_space` of the env :-(; 5. Add a versioned constructor of your env in the directory which exports the factory functions (see `mobeach_v0.py` for an example). -6. Add your environment to the tests in `tests/all_modules.py` +6. Add your environment to the tests in `utils/all_modules.py` 7. Run `pytest` to check that everything works diff --git a/momadm_benchmarks/envs/crazyrl/catch/__init__.py b/momadm_benchmarks/envs/crazyrl/catch/__init__.py new file mode 100644 index 00000000..71923133 --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/catch/__init__.py @@ -0,0 +1 @@ +"""Catch environment for multi-agent reinforcement learning.""" diff --git a/momadm_benchmarks/envs/crazyrl/catch/catch.py b/momadm_benchmarks/envs/crazyrl/catch/catch.py new file mode 100644 index 00000000..868e6129 --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/catch/catch.py @@ -0,0 +1,130 @@ +"""Catch environment for Crazyflie 2. Each agent is supposed to learn to surround a common target point trying to escape.""" + +from typing_extensions import override + +import numpy as np +from pettingzoo.utils.wrappers import AssertOutOfBoundsWrapper + +from momadm_benchmarks.envs.crazyrl.crazyRL_base import FPS, CrazyRLBaseParallelEnv +from momadm_benchmarks.utils.conversions import mo_parallel_to_aec + + +def env(*args, **kwargs): + """Returns the wrapped environment in `AEC` format. + + Args: + **kwargs: keyword args to forward to the raw_env function. + + Returns: + A wrapped AEC env. + """ + env = raw_env(*args, **kwargs) + env = mo_parallel_to_aec(env) + env = AssertOutOfBoundsWrapper(env) + return env + + +def parallel_env(*args, **kwargs): + """Returns the wrapped env in `parallel` format. + + Args: + **kwargs: keyword args to forward to the raw_env function. + + Returns: + A parallel env. + """ + env = raw_env(*args, **kwargs) + return env + + +def raw_env(*args, **kwargs): + """Returns the environment in `Parallel` format. + + Args: + **kwargs: keyword args to forward to create the `Catch` environment. + + Returns: + A raw env. + """ + return Catch(*args, **kwargs) + + +class Catch(CrazyRLBaseParallelEnv): + """A Parallel Environment where drone learn how to surround a moving target trying to escape.""" + + metadata = {"render_modes": ["human"], "name": "mocatch_v0", "is_parallelizable": True, "render_fps": FPS} + + @override + def __init__(self, *args, target_speed=0.1, **kwargs): + """Catch environment in CrazyRL. + + Args: + render_mode (str, optional): The mode to display the rendering of the environment. Can be human or None. + size (int, optional): Size of the area sides + num_drones: amount of drones + init_flying_pos: 2d array containing the coordinates of the agents + is a (3)-shaped array containing the initial XYZ position of the drones. + init_target_location: Array of the initial position of the moving target + target_speed: Distance traveled by the target at each timestep + """ + + super().__init__(*args, **kwargs) + self.target_speed = target_speed + + def _move_target(self): + # mean of the agent's positions + mean = np.array([0, 0, 0]) + for agent in self.agents: + mean = mean + self.agent_location[agent] + + mean = mean / self.num_drones + + dist = np.linalg.norm(mean - self.target_location) + self.target_location = self.target_location.copy() + + # go to the opposite direction of the mean of the agents + if dist > 0.2: + self.target_location += (self.target_location - mean) / dist * self.target_speed + + # if the mean of the agents is too close to the target, move the target in a random direction, slowly because + # it hesitates + else: + self.target_location += np.random.random_sample(3) * self.target_speed * 0.1 + + # if the target is out of the map, put it back in the map + np.clip( + self.target_location, + [-self.size, -self.size, 0.2], + [self.size, self.size, 3], + out=self.target_location, + ) + + @override + def _transition_state(self, actions): + target_point_action = dict() + state = self.agent_location + + # new targets + self.previous_target = self.target_location.copy() + self._move_target() + + for agent in self.agents: + # Actions are clipped to stay in the map and scaled to do max 20cm in one step + target_point_action[agent] = np.clip( + state[agent] + actions[agent] * 0.2, [-self.size, -self.size, 0], [self.size, self.size, 3] + ) + + return target_point_action + + +if __name__ == "__main__": + prll_env = Catch(render_mode="human") + + observations, infos = prll_env.reset() + + while prll_env.agents: + actions = { + agent: prll_env.action_space(agent).sample() for agent in prll_env.agents + } # this is where you would insert your policy + observations, rewards, terminations, truncations, infos = prll_env.step(actions) + prll_env.render() diff --git a/momadm_benchmarks/envs/crazyrl/catch/mocatch_v0.py b/momadm_benchmarks/envs/crazyrl/catch/mocatch_v0.py new file mode 100644 index 00000000..7de8b41e --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/catch/mocatch_v0.py @@ -0,0 +1,5 @@ +"""CrazyRL/Catch environment for MOMARL.""" +from momadm_benchmarks.envs.crazyrl.catch.catch import env, parallel_env, raw_env + + +__all__ = ["env", "parallel_env", "raw_env"] diff --git a/momadm_benchmarks/envs/crazyrl/crazyRL_base.py b/momadm_benchmarks/envs/crazyrl/crazyRL_base.py new file mode 100644 index 00000000..de1a1921 --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/crazyRL_base.py @@ -0,0 +1,399 @@ +"""The Base environment inheriting from pettingZoo Parallel environment class.""" +from copy import copy +from typing import Optional +from typing_extensions import override + +import numpy as np +import numpy.typing as npt +import pygame +from gymnasium import spaces +from OpenGL.GL import ( + GL_AMBIENT, + GL_AMBIENT_AND_DIFFUSE, + GL_BLEND, + GL_COLOR_BUFFER_BIT, + GL_COLOR_MATERIAL, + GL_DEPTH_BUFFER_BIT, + GL_DEPTH_TEST, + GL_DIFFUSE, + GL_FRONT_AND_BACK, + GL_LIGHT0, + GL_LIGHTING, + GL_MODELVIEW, + GL_MODELVIEW_MATRIX, + GL_ONE_MINUS_SRC_ALPHA, + GL_POSITION, + GL_PROJECTION, + GL_SMOOTH, + GL_SRC_ALPHA, + glBlendFunc, + glClear, + glColor4f, + glColorMaterial, + glEnable, + glGetFloatv, + glLight, + glLightfv, + glLineWidth, + glLoadIdentity, + glMatrixMode, + glMultMatrixf, + glPopMatrix, + glPushMatrix, + glShadeModel, +) +from OpenGL.raw.GLU import gluLookAt, gluPerspective +from pygame import DOUBLEBUF, OPENGL + +from momadm_benchmarks.envs.crazyrl.gl_utils import axes, field, point, target_point +from momadm_benchmarks.utils.env import MOParallelEnv + + +def _distance_to_target(agent_location: npt.NDArray[np.float32], target_location: npt.NDArray[np.float32]) -> float: + return np.linalg.norm(agent_location - target_location) + + +CLOSENESS_THRESHOLD = 0.1 +FPS = 20 + + +class CrazyRLBaseParallelEnv(MOParallelEnv): + """The Base environment inheriting from pettingZoo Parallel environment class. + + The main API methods of this class are: + - step + - reset + - render + - close + - state + + they are defined in this main environment, as well as the following attributes: + action_space: The Space object corresponding to valid actions + observation_space: The Space object corresponding to valid observations + reward_space: The Space object corresponding to valid rewards + """ + + metadata = { + "render_modes": ["human"], + "is_parallelizable": True, + "render_fps": FPS, + } + + def __init__( + self, + render_mode: Optional[str] = None, + size: int = 3, + num_drones: int = 4, + init_flying_pos=np.array([[0, 0, 1], [1, 1, 1], [0, 1, 1], [2, 2, 1]]), + init_target_location=np.array([1, 1, 2.5]), + ): + """Initialization of a CrazyRL environment. + + Args: + render_mode (str, optional): The mode to display the rendering of the environment. Can be human or None. + size (int, optional): Size of the area sides + num_drones: amount of drones + init_flying_pos: 2d array containing the coordinates of the agents + is a (3)-shaped array containing the initial XYZ position of the drones. + init_target_location: Array of the initial position of the moving target + """ + self.num_drones = num_drones + self.agents_names = np.array(["agent_" + str(i) for i in range(self.num_drones)]) + self.size = size # The size of the square grid + + # locations + self.init_flying_pos = {agent: init_flying_pos[i].copy() for i, agent in enumerate(self.agents_names)} + self.agent_location = self.init_flying_pos.copy() + self.previous_location = self.init_flying_pos.copy() # for potential based reward + + # targets + self.init_target_location = init_target_location.copy() + self.target_location = init_target_location.copy() + self.previous_target = init_target_location.copy() + + self.possible_agents = self.agents_names.tolist() + self.timestep = 0 + self.agents = [] + self.size = size + + # spaces + self.action_spaces = dict(zip(self.agents_names, [self._action_space() for agent in self.agents_names])) + self.observation_spaces = dict(zip(self.agents_names, [self._observation_space() for agent in self.agents_names])) + self.reward_spaces = dict(zip(self.agents_names, [self._reward_space() for agent in self.agents_names])) + + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + + if self.render_mode == "human": + self.window_size = 900 # The size of the PyGame window + self.window = None + self.clock = None + + def _observation_space(self): + return spaces.Box( + low=np.tile(np.array([-self.size, -self.size, 0], dtype=np.float32), self.num_drones + 1), + high=np.tile(np.array([self.size, self.size, 3], dtype=np.float32), self.num_drones + 1), + shape=(3 * (self.num_drones + 1),), # coordinates of the drones and the target + dtype=np.float32, + ) + + def _action_space(self): + return spaces.Box(low=-1 * np.ones(3, dtype=np.float32), high=np.ones(3, dtype=np.float32), dtype=np.float32) + + def _reward_space(self): + return spaces.Box( + low=np.array([-10, -10], dtype=np.float32), + high=np.array([1, np.inf], dtype=np.float32), + shape=(2,), + dtype=np.float32, + ) + + def action_space(self, agent): + """Returns the action space for the given agent.""" + return self.action_spaces[agent] + + def observation_space(self, agent): + """Returns the observation space for the given agent.""" + return self.observation_spaces[agent] + + def reward_space(self, agent): + """Returns the reward space for the given agent.""" + return self.reward_spaces[agent] + + def _transition_state(self, action): + """Computes the action passed to `.step()` into action matching the mode environment. Must be implemented in a subclass. + + Args: + action : ndarray | dict[..]. The input action for one drones + """ + raise NotImplementedError + + def _compute_obs(self): + obs = dict() + + for agent in self.agents_names: + obs[agent] = self.agent_location[agent].copy() + obs[agent] = np.append(obs[agent], self.target_location) + + for other_agent in self.agents_names: + if other_agent != agent: + obs[agent] = np.append(obs[agent], self.agent_location[other_agent]) + obs[agent] = np.array(obs[agent], dtype=(np.float32)) + + return obs + + def _compute_reward(self): + reward = dict() + + for agent in self.agents_names: + reward_far_from_other_agents = 0 + reward_close_to_target = 0 + + # mean distance to the other agents + for other_agent in self.agents_names: + if other_agent != agent: + reward_far_from_other_agents += np.linalg.norm( + self.agent_location[agent] - self.agent_location[other_agent] + ) + + reward_far_from_other_agents /= self.num_drones - 1 + + # distance to the target + # (!) targets and locations must be updated before this + dist_from_old_target = _distance_to_target(self.agent_location[agent], self.previous_target) + old_dist = _distance_to_target(self.previous_location[agent], self.previous_target) + + # reward should be new_potential - old_potential but since the distances should be negated we reversed the signs + # -new_potential - (-old_potential) = old_potential - new_potential + reward_close_to_target = old_dist - dist_from_old_target + + # collision between two drones + for other_agent in self.agents_names: + if other_agent != agent and ( + np.linalg.norm(self.agent_location[agent] - self.agent_location[other_agent]) < CLOSENESS_THRESHOLD + ): + reward_far_from_other_agents = -10 + reward_close_to_target = -10 + + # collision with the ground or the target + if ( + self.agent_location[agent][2] < CLOSENESS_THRESHOLD + or np.linalg.norm(self.agent_location[agent] - self.target_location) < CLOSENESS_THRESHOLD + ): + reward_far_from_other_agents = -10 + reward_close_to_target = -10 + + reward[agent] = np.array([reward_close_to_target, reward_far_from_other_agents], dtype=np.float32) + + return reward + + def _compute_terminated(self): + terminated = dict() + + for agent in self.agents: + terminated[agent] = False + + for agent in self.agents: + # collision between two drones + for other_agent in self.agents: + if other_agent != agent: + terminated[agent] = terminated[agent] or ( + np.linalg.norm(self.agent_location[agent] - self.agent_location[other_agent]) < CLOSENESS_THRESHOLD + ) + + # collision with the ground + terminated[agent] = terminated[agent] or (self.agent_location[agent][2] < CLOSENESS_THRESHOLD) + + # collision with the target + terminated[agent] = terminated[agent] or ( + np.linalg.norm(self.agent_location[agent] - self.target_location) < CLOSENESS_THRESHOLD + ) + + if terminated[agent]: + for other_agent in self.agents: + terminated[other_agent] = True + self.agents = [] + + terminated[agent] = bool(terminated[agent]) + + return terminated + + def _compute_truncation(self): + if self.timestep == 200: + truncation = {agent: True for agent in self.agents_names} + self.agents = [] + self.timestep = 0 + else: + truncation = {agent: False for agent in self.agents_names} + return truncation + + def _compute_info(self): + info = dict() + for agent in self.agents_names: + info[agent] = {} + return info + + # PettingZoo API + @override + def reset(self, seed=None, return_info=False, options=None): + self.timestep = 0 + self.agents = copy(self.possible_agents) + self.target_location = self.init_target_location.copy() + self.previous_target = self.init_target_location.copy() + + self.agent_location = self.init_flying_pos.copy() + self.previous_location = self.init_flying_pos.copy() + + observation = self._compute_obs() + infos = self._compute_info() + + if self.render_mode == "human": + self.render() + return observation, infos + + @override + def step(self, actions): + self.timestep += 1 + + new_locations = self._transition_state(actions) + self.previous_location = self.agent_location + self.agent_location = new_locations + + if self.render_mode == "human": + self.render() + + observations = self._compute_obs() + rewards = self._compute_reward() + terminations = self._compute_terminated() + truncations = self._compute_truncation() + infos = self._compute_info() + + return observations, rewards, terminations, truncations, infos + + @override + def render(self): + """Renders the current frame of the environment. Only works in human rendering mode.""" + + def init_window(): + """Initializes the PyGame window.""" + pygame.init() + pygame.display.init() + pygame.display.set_caption("Crazy RL") + + self.window = pygame.display.set_mode((self.window_size, self.window_size), DOUBLEBUF | OPENGL) + + glEnable(GL_DEPTH_TEST) + glEnable(GL_LIGHTING) + glShadeModel(GL_SMOOTH) + glEnable(GL_COLOR_MATERIAL) + glColorMaterial(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE) + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) + glEnable(GL_BLEND) + glLineWidth(1.5) + + glEnable(GL_LIGHT0) + glLightfv(GL_LIGHT0, GL_AMBIENT, [0.5, 0.5, 0.5, 1]) + glLightfv(GL_LIGHT0, GL_DIFFUSE, [1.0, 1.0, 1.0, 1]) + + glMatrixMode(GL_PROJECTION) + gluPerspective(75, (self.window_size / self.window_size), 0.1, 50.0) + + glMatrixMode(GL_MODELVIEW) + gluLookAt(3, -11, 3, 0, 0, 0, 0, 0, 1) + + self.viewMatrix = glGetFloatv(GL_MODELVIEW_MATRIX) + glLoadIdentity() + + if self.window is None: + init_window() + + # if self.clock is None and self.render_mode == "human": + self.clock = pygame.time.Clock() + + glLoadIdentity() + + # init the view matrix + glPushMatrix() + glLoadIdentity() + + # multiply the current matrix by the get the new view matrix and store the final view matrix + glMultMatrixf(self.viewMatrix) + self.viewMatrix = glGetFloatv(GL_MODELVIEW_MATRIX) + + # apply view matrix + glPopMatrix() + glMultMatrixf(self.viewMatrix) + + glLight(GL_LIGHT0, GL_POSITION, (-1, -1, 5, 1)) # point light from the left, top, front + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) + + for agent in self.agent_location.values(): + glPushMatrix() + point(np.array([agent[0], agent[1], agent[2]])) + + glPopMatrix() + + glColor4f(0.5, 0.5, 0.5, 1) + field(self.size) + axes() + + # for target in self.target_location: + glPushMatrix() + target_point(np.array([self.target_location[0], self.target_location[1], self.target_location[2]])) + glPopMatrix() + + pygame.event.pump() + pygame.display.flip() + + @override + def state(self): + states = tuple(self._compute_obs()[agent].astype(np.float32) for agent in self.possible_agents) + return np.concatenate(states, axis=None) + + @override + def close(self): + if self.render_mode == "human": + if self.window is not None: + pygame.display.quit() + pygame.quit() diff --git a/momadm_benchmarks/envs/crazyrl/escort/__init__.py b/momadm_benchmarks/envs/crazyrl/escort/__init__.py new file mode 100644 index 00000000..20ec6077 --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/escort/__init__.py @@ -0,0 +1 @@ +"""Escort environment for multi-agent reinforcement learning.""" diff --git a/momadm_benchmarks/envs/crazyrl/escort/escort.py b/momadm_benchmarks/envs/crazyrl/escort/escort.py new file mode 100644 index 00000000..5d5cb4c5 --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/escort/escort.py @@ -0,0 +1,120 @@ +"""Escort environment for Crazyflie 2. Each agent is supposed to learn to surround a common target point moving to one point to another.""" + +from typing_extensions import override + +import numpy as np +from pettingzoo.utils.wrappers import AssertOutOfBoundsWrapper + +from momadm_benchmarks.envs.crazyrl.crazyRL_base import FPS, CrazyRLBaseParallelEnv +from momadm_benchmarks.utils.conversions import mo_parallel_to_aec + + +def env(*args, **kwargs): + """Returns the wrapped environment in `AEC` format. + + Args: + **kwargs: keyword args to forward to the raw_env function. + + Returns: + A wrapped AEC env. + """ + env = raw_env(*args, **kwargs) + env = mo_parallel_to_aec(env) + env = AssertOutOfBoundsWrapper(env) + return env + + +def parallel_env(*args, **kwargs): + """Returns the wrapped env in `parallel` format. + + Args: + **kwargs: keyword args to forward to the raw_env function. + + Returns: + A parallel env. + """ + env = raw_env(*args, **kwargs) + return env + + +def raw_env(*args, **kwargs): + """Returns the environment in `Parallel` format. + + Args: + **kwargs: keyword args to forward to create the `MOMultiwalker` environment. + + Returns: + A raw env. + """ + return Escort(*args, **kwargs) + + +class Escort(CrazyRLBaseParallelEnv): + """A Parallel Environment where drone learn how to surround a moving target, going straight to one point to another.""" + + metadata = {"render_modes": ["human"], "name": "moescort_v0", "is_parallelizable": True, "render_fps": FPS} + + def __init__(self, *args, num_intermediate_points: int = 50, final_target_location=np.array([-2, -2, 3]), **kwargs): + """Escort environment in CrazyRL. + + Args: + render_mode (str, optional): The mode to display the rendering of the environment. Can be human or None. + size (int, optional): Size of the area sides + num_drones: amount of drones + init_flying_pos: 2d array containing the coordinates of the agents + is a (3)-shaped array containing the initial XYZ position of the drones. + init_target_location: A (3)-shaped array for the XYZ position of the target. + final_target_location: Array of the final position of the moving target + num_intermediate_points: Number of intermediate points in the target trajectory + """ + self.final_target_location = final_target_location + + super().__init__(*args, **kwargs) + + # There are two more ref points than intermediate points, one for the initial and final target locations + self.num_ref_points = num_intermediate_points + 2 + # Ref is a 2d arrays for the target + # it contains the reference points (xyz) for the target at each timestep + self.ref: np.ndarray = np.array([self.init_target_location]) + + for t in range(1, self.num_ref_points): + self.ref = np.append( + self.ref, + [ + self.init_target_location + + (self.final_target_location - self.init_target_location) * t / self.num_ref_points + ], + axis=0, + ) + + @override + def _transition_state(self, actions): + target_point_action = dict() + state = self.agent_location + # new targets + self.previous_target = self.target_location.copy() + if self.timestep < self.num_ref_points: + self.target_location = self.ref[self.timestep] + else: # the target has stopped + self.target_location = self.ref[-1] + + for agent in self.agents: + # Actions are clipped to stay in the map and scaled to do max 20cm in one step + target_point_action[agent] = np.clip( + state[agent] + actions[agent] * 0.2, [-self.size, -self.size, 0], [self.size, self.size, 3] + ) + + return target_point_action + + +if __name__ == "__main__": + prll_env = Escort(render_mode="human") + + observations, infos = prll_env.reset() + + while prll_env.agents: + actions = { + agent: prll_env.action_space(agent).sample() for agent in prll_env.agents + } # this is where you would insert your policy + observations, rewards, terminations, truncations, infos = prll_env.step(actions) + prll_env.render() diff --git a/momadm_benchmarks/envs/crazyrl/escort/moescort_v0.py b/momadm_benchmarks/envs/crazyrl/escort/moescort_v0.py new file mode 100644 index 00000000..f8b6927c --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/escort/moescort_v0.py @@ -0,0 +1,5 @@ +"""CrazyRL/Escort environment for MOMARL.""" +from momadm_benchmarks.envs.crazyrl.escort.escort import env, parallel_env, raw_env + + +__all__ = ["env", "parallel_env", "raw_env"] diff --git a/momadm_benchmarks/envs/crazyrl/gl_utils.py b/momadm_benchmarks/envs/crazyrl/gl_utils.py new file mode 100644 index 00000000..057a59c2 --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/gl_utils.py @@ -0,0 +1,95 @@ +"""Graphical representation of the UAV in 3D space. the reference is based on the Crazyflie position reference: https://www.bitcraze.io/documentation/repository/crazyflie-firmware/master/functional-areas/lighthouse/terminology_definitions/ .""" +import numpy as np +from OpenGL.GL import ( + GL_LINES, + GL_QUADS, + glBegin, + glColor3f, + glColor4f, + glEnd, + glTranslatef, + glVertex3f, + glVertex3fv, +) +from OpenGL.GLU import gluNewQuadric +from OpenGL.raw.GLU import gluSphere + + +def axes(): + """Draw axes on the opengl simulation view.""" + glBegin(GL_LINES) + + glColor3f(0, 0, 1.0) + glVertex3fv((0, 0, -2)) + glVertex3fv((0, 0, -1)) + + glColor3f(0, 1.0, 0) + glVertex3fv((0, 0, -1.98)) + glVertex3fv((-1, 0, -1.98)) + + glColor3f(1.0, 0, 0) + glVertex3fv((0, 0, -1.98)) + glVertex3fv((0, 1, -1.98)) + + glEnd() + + +def field(size): + """Draw the field on the opengl simulation view. + + Args: + size: int the size of the side field + """ + glBegin(GL_QUADS) + glVertex3f(-size, -size, -2) + glVertex3f(size, -size, -2) + glVertex3f(size, size, -2) + glVertex3f(-size, size, -2) + glEnd() + + glColor3f(1.0, 1.0, 1.0) + glBegin(GL_LINES) + for i in np.arange(-size, size, 1): + glVertex3f(-size, i, -1.99) + glVertex3f(size, i, -1.99) + + glVertex3f(i, size, -1.99) + glVertex3f(i, -size, -1.99) + + glEnd() + + +def point(point): + """Draw the drone as a little red dot with a stick to visualize better the projection on the grid. + + Args: + point: tuple x,y,z position + """ + sphere = gluNewQuadric() + glTranslatef(-point[1], point[0], point[2] - 2) + glColor4f(0.5, 0.2, 0.2, 1) + gluSphere(sphere, 0.1, 32, 16) + + glBegin(GL_LINES) + # glColor4f(0.5, 0.2, 0.2, 0.3) + glVertex3f(0, 0, 0) + glVertex3f(0, 0, -2 - point[2]) + glEnd() + + +def target_point(point): + """Draw the target point as a bigger yellow dot with a stick to visualize better the projection on the grid. + + Args: + point: tuple x,y,z position + """ + sphere = gluNewQuadric() + glTranslatef(-point[1], point[0], point[2] - 2) + glColor4f(0.6, 0.6, 0, 0.7) + gluSphere(sphere, 0.2, 32, 16) + + glBegin(GL_LINES) + glColor4f(0.7, 0.7, 0, 0.3) + glVertex3f(0, 0, 0) + glVertex3f(0, 0, -2 - point[2]) + glEnd() diff --git a/momadm_benchmarks/envs/crazyrl/surround/__init__.py b/momadm_benchmarks/envs/crazyrl/surround/__init__.py new file mode 100644 index 00000000..3fe7a540 --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/surround/__init__.py @@ -0,0 +1 @@ +"""Surround environment for multi-agent reinforcement learning.""" diff --git a/momadm_benchmarks/envs/crazyrl/surround/mosurround_v0.py b/momadm_benchmarks/envs/crazyrl/surround/mosurround_v0.py new file mode 100644 index 00000000..74c9208a --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/surround/mosurround_v0.py @@ -0,0 +1,5 @@ +"""CrazyRL/Surround environment for MOMARL.""" +from momadm_benchmarks.envs.crazyrl.surround.surround import env, parallel_env, raw_env + + +__all__ = ["env", "parallel_env", "raw_env"] diff --git a/momadm_benchmarks/envs/crazyrl/surround/surround.py b/momadm_benchmarks/envs/crazyrl/surround/surround.py new file mode 100644 index 00000000..6b21aa5d --- /dev/null +++ b/momadm_benchmarks/envs/crazyrl/surround/surround.py @@ -0,0 +1,84 @@ +"""Surround environment for Crazyflie 2. Each agent is supposed to learn to surround a common target point.""" +from typing_extensions import override + +import numpy as np +from pettingzoo.utils.wrappers import AssertOutOfBoundsWrapper + +from momadm_benchmarks.envs.crazyrl.crazyRL_base import FPS, CrazyRLBaseParallelEnv +from momadm_benchmarks.utils.conversions import mo_parallel_to_aec + + +def env(*args, **kwargs): + """Returns the wrapped environment in `AEC` format. + + Args: + **kwargs: keyword args to forward to the raw_env function. + + Returns: + A wrapped AEC env. + """ + env = raw_env(*args, **kwargs) + env = mo_parallel_to_aec(env) + env = AssertOutOfBoundsWrapper(env) + return env + + +def parallel_env(*args, **kwargs): + """Returns the wrapped env in `parallel` format. + + Args: + **kwargs: keyword args to forward to the raw_env function. + + Returns: + A parallel env. + """ + env = raw_env(*args, **kwargs) + return env + + +def raw_env(*args, **kwargs): + """Returns the environment in `Parallel` format. + + Args: + **kwargs: keyword args to forward to create the `MOMultiwalker` environment. + + Returns: + A raw env. + """ + return Surround(*args, **kwargs) + + +class Surround(CrazyRLBaseParallelEnv): + """A Parallel Environment where drone learn how to surround a target point.""" + + metadata = {"render_modes": ["human"], "name": "mosurround_v0", "is_parallelizable": True, "render_fps": FPS} + + @override + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @override + def _transition_state(self, actions): + target_point_action = dict() + state = self.agent_location + + for agent in self.agents: + # Actions are clipped to stay in the map and scaled to do max 20cm in one step + target_point_action[agent] = np.clip( + state[agent] + actions[agent] * 0.2, [-self.size, -self.size, 0], [self.size, self.size, 3] + ) + + return target_point_action + + +if __name__ == "__main__": + prll_env = Surround(render_mode="human") + + observations, infos = prll_env.reset() + + while prll_env.agents: + actions = { + agent: prll_env.action_space(agent).sample() for agent in prll_env.agents + } # this is where you would insert your policy + observations, rewards, terminations, truncations, infos = prll_env.step(actions) + prll_env.render() diff --git a/momadm_benchmarks/utils/all_modules.py b/momadm_benchmarks/utils/all_modules.py new file mode 100644 index 00000000..b9cb8292 --- /dev/null +++ b/momadm_benchmarks/utils/all_modules.py @@ -0,0 +1,21 @@ +"""Environment registry. + +Used for: +- testing +- rendering GIF images +""" + +from momadm_benchmarks.envs.beach_domain import mobeach_v0 +from momadm_benchmarks.envs.crazyrl.catch import mocatch_v0 +from momadm_benchmarks.envs.crazyrl.escort import moescort_v0 +from momadm_benchmarks.envs.crazyrl.surround import mosurround_v0 +from momadm_benchmarks.envs.multiwalker import momultiwalker_v0 + + +all_environments = { + "mobeach_v0": mobeach_v0, + "momultiwalker_v0": momultiwalker_v0, + "mocatch_v0": mocatch_v0, + "mosurround_v0": mosurround_v0, + "moescort_v0": moescort_v0, +} diff --git a/momadm_benchmarks/utils/generate_gif_image.py b/momadm_benchmarks/utils/generate_gif_image.py new file mode 100644 index 00000000..89e0a650 --- /dev/null +++ b/momadm_benchmarks/utils/generate_gif_image.py @@ -0,0 +1,47 @@ +"""Requirement for users. + +- The package `ffmpeg` is required to be installed on host system for the PNG[] -> GIF process. +""" + +import sys + +from PIL import Image + +from momadm_benchmarks.utils.all_modules import all_environments + + +def generate_gif(nameline, module): + """Generates a GIF of a full environment cycle.""" + env = module.env(render_mode="rgb_array") + env.reset() + imgs = [] + for _ in range(100): + for agent in env.agent_iter(env.num_agents): # step through every agent once with observe=True + obs, rew, termination, truncation, info = env.last() + if termination or truncation: + action = None + else: + action = env.action_spaces[agent].sample() + env.step(action) + + # save rgb_array data + ndarray = env.render() + im = Image.fromarray(ndarray) + imgs.append(im) + + env.close() + + # render gif from data + imgs[0].save(f"{nameline}.gif", save_all=True, append_images=imgs[1:], duration=40, loop=0) + + +if __name__ == "__main__": + name = sys.argv[1] + if name == "all": + for name, module in all_environments.items(): + nameline = name.replace("/", "_") + generate_gif(nameline, module) + else: + module = all_environments[name] + nameline = name.replace("/", "_") + generate_gif(nameline, module) diff --git a/tests/all_modules.py b/tests/all_modules.py deleted file mode 100644 index 63aa02ab..00000000 --- a/tests/all_modules.py +++ /dev/null @@ -1,8 +0,0 @@ -from momadm_benchmarks.envs.beach_domain import mobeach_v0 -from momadm_benchmarks.envs.multiwalker import momultiwalker_v0 - - -all_environments = { - "mobeach_v0": mobeach_v0, - "momultiwalker_v0": momultiwalker_v0, -} diff --git a/tests/test_envs.py b/tests/test_envs.py index d7671cea..69fcc04b 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -5,8 +5,7 @@ from pettingzoo.test import parallel_api_test, seed_test from momadm_benchmarks.test.api_test import api_test - -from .all_modules import all_environments +from momadm_benchmarks.utils.all_modules import all_environments @pytest.mark.parametrize(("name", "env_module"), list(all_environments.items()))