instadeepai · clement-bonnet · Mar 21, 2024 · Jan 19, 2024 · Jan 20, 2024 · Jan 20, 2024
diff --git a/README.md b/README.md
@@ -159,7 +159,7 @@ state, timestep = jax.jit(env.reset)(key)
 env.render(state)
 
 # Interact with the (jit-able) environment
-action = env.action_spec().generate_value()          # Action selection (dummy value here)
+action = env.action_spec.generate_value()          # Action selection (dummy value here)
 state, timestep = jax.jit(env.step)(state, action)   # Take a step and observe the next state and time step
 ```
 

diff --git a/docs/guides/advanced_usage.md b/docs/guides/advanced_usage.md
@@ -16,7 +16,7 @@ env = AutoResetWrapper(env)     # Automatically reset the environment when an ep
 
 batch_size = 7
 rollout_length = 5
-num_actions = env.action_spec().num_values
+num_actions = env.action_spec.num_values
 
 random_key = jax.random.PRNGKey(0)
 key1, key2 = jax.random.split(random_key)

diff --git a/docs/guides/wrappers.md b/docs/guides/wrappers.md
@@ -13,7 +13,7 @@ env = jumanji.make("Snake-6x6-v0")
 dm_env = jumanji.wrappers.JumanjiToDMEnvWrapper(env)
 
 timestep = dm_env.reset()
-action = dm_env.action_spec().generate_value()
+action = dm_env.action_spec.generate_value()
 next_timestep = dm_env.step(action)
 ...
 ```
@@ -52,7 +52,7 @@ key = jax.random.PRNGKey(0)
 state, timestep = env.reset(key)
 print("New episode")
 for i in range(100):
-    action = env.action_spec().generate_value()  # Returns jnp.array(0) when using Snake.
+    action = env.action_spec.generate_value()  # Returns jnp.array(0) when using Snake.
     state, timestep = env.step(state, action)
     if timestep.first():
         print("New episode")

diff --git a/jumanji/env.py b/jumanji/env.py
@@ -33,9 +33,10 @@ class StateProtocol(Protocol):
 
 
 State = TypeVar("State", bound="StateProtocol")
+ActionSpec = TypeVar("ActionSpec", bound=specs.Array)
 
 
-class Environment(abc.ABC, Generic[State]):
+class Environment(abc.ABC, Generic[State, ActionSpec]):
     """Environment written in Jax that differs from the gym API to make the step and
     reset functions jittable. The state contains all the dynamics and data needed to step
     the environment, no computation stored in attributes of self.
@@ -45,6 +46,13 @@ class Environment(abc.ABC, Generic[State]):
     def __repr__(self) -> str:
         return "Environment."
 
+    def __init__(self) -> None:
+        """Initialize environment."""
+        self._observation_spec = self._make_observation_spec()
+        self._action_spec = self._make_action_spec()
+        self._reward_spec = self._make_reward_spec()
+        self._discount_spec = self._make_discount_spec()
+
     @abc.abstractmethod
     def reset(self, key: chex.PRNGKey) -> Tuple[State, TimeStep]:
         """Resets the environment to an initial state.
@@ -70,34 +78,68 @@ def step(self, state: State, action: chex.Array) -> Tuple[State, TimeStep]:
             timestep: TimeStep object corresponding the timestep returned by the environment,
         """
 
-    @abc.abstractmethod
+    @property
     def observation_spec(self) -> specs.Spec:
         """Returns the observation spec.
 
         Returns:
             observation_spec: a NestedSpec tree of spec.
         """
+        return self._observation_spec
 
     @abc.abstractmethod
-    def action_spec(self) -> specs.Spec:
+    def _make_observation_spec(self) -> specs.Spec:
+        """Returns new observation spec.
+
+        Returns:
+            observation_spec: a NestedSpec tree of spec.
+        """
+
+    @property
+    def action_spec(self) -> ActionSpec:
         """Returns the action spec.
 
         Returns:
             action_spec: a NestedSpec tree of spec.
         """
+        return self._action_spec
+
+    @abc.abstractmethod
+    def _make_action_spec(self) -> ActionSpec:
+        """Returns new action spec.
 
+        Returns:
+            action_spec: a NestedSpec tree of spec.
+        """
+
+    @property
     def reward_spec(self) -> specs.Array:
-        """Describes the reward returned by the environment. By default, this is assumed to be a
-        single float.
+        """Returns the reward spec. By default, this is assumed to be a single float.
+
+        Returns:
+            reward_spec: a `specs.Array` spec.
+        """
+        return self._reward_spec
+
+    def _make_reward_spec(self) -> specs.Array:
+        """Returns new reward spec. By default, this is assumed to be a single float.
 
         Returns:
             reward_spec: a `specs.Array` spec.
         """
         return specs.Array(shape=(), dtype=float, name="reward")
 
+    @property
     def discount_spec(self) -> specs.BoundedArray:
-        """Describes the discount returned by the environment. By default, this is assumed to be a
-        single float between 0 and 1.
+        """Returns the discount spec. By default, this is assumed to be a single float between 0 and 1.
+
+        Returns:
+            discount_spec: a `specs.BoundedArray` spec.
+        """
+        return self._discount_spec
+
+    def _make_discount_spec(self) -> specs.BoundedArray:
+        """Returns new discount spec. By default, this is assumed to be a single float between 0 and 1.
 
         Returns:
             discount_spec: a `specs.BoundedArray` spec.

diff --git a/jumanji/environments/logic/game_2048/env.py b/jumanji/environments/logic/game_2048/env.py
@@ -29,7 +29,7 @@
 from jumanji.viewer import Viewer
 
 
-class Game2048(Environment[State]):
+class Game2048(Environment[State, specs.DiscreteArray]):
     """Environment for the game 2048. The game consists of a board of size board_size x board_size
     (4x4 by default) in which the player can take actions to move the tiles on the board up, down,
     left, or right. The goal of the game is to combine tiles with the same number to create a tile
@@ -69,7 +69,7 @@ class Game2048(Environment[State]):
     key = jax.random.key(0)
     state, timestep = jax.jit(env.reset)(key)
     env.render(state)
-    action = env.action_spec().generate_value()
+    action = env.action_spec.generate_value()
     state, timestep = jax.jit(env.step)(state, action)
     env.render(state)
     ```
@@ -85,6 +85,7 @@ def __init__(
             viewer: `Viewer` used for rendering. Defaults to `Game2048Viewer`.
         """
         self.board_size = board_size
+        super().__init__()
 
         # Create viewer used for rendering
         self._viewer = viewer or Game2048Viewer("2048", board_size)
@@ -97,7 +98,7 @@ def __repr__(self) -> str:
         """
         return f"2048 Game(board_size={self.board_size})"
 
-    def observation_spec(self) -> specs.Spec[Observation]:
+    def _make_observation_spec(self) -> specs.Spec[Observation]:
         """Specifications of the observation of the `Game2048` environment.
 
         Returns:
@@ -122,8 +123,8 @@ def observation_spec(self) -> specs.Spec[Observation]:
             ),
         )
 
-    def action_spec(self) -> specs.DiscreteArray:
-        """Returns the action spec.
+    def _make_action_spec(self) -> specs.DiscreteArray:
+        """Returns new action spec.
 
         4 actions: [0, 1, 2, 3] -> [Up, Right, Down, Left].
 

diff --git a/jumanji/environments/logic/game_2048/env_test.py b/jumanji/environments/logic/game_2048/env_test.py
@@ -19,7 +19,10 @@
 
 from jumanji.environments.logic.game_2048.env import Game2048
 from jumanji.environments.logic.game_2048.types import Board, State
-from jumanji.testing.env_not_smoke import check_env_does_not_smoke
+from jumanji.testing.env_not_smoke import (
+    check_env_does_not_smoke,
+    check_env_specs_does_not_smoke,
+)
 from jumanji.testing.pytrees import assert_is_jax_array_tree
 from jumanji.types import TimeStep
 
@@ -154,3 +157,8 @@ def test_game_2048__get_action_mask(game_2048: Game2048, board: Board) -> None:
 def test_game_2048__does_not_smoke(game_2048: Game2048) -> None:
     """Test that we can run an episode without any errors."""
     check_env_does_not_smoke(game_2048)
+
+
+def test_game_2048__specs_does_not_smoke(game_2048: Game2048) -> None:
+    """Test that we access specs without any errors."""
+    check_env_specs_does_not_smoke(game_2048)
diff --git a/jumanji/environments/logic/graph_coloring/env.py b/jumanji/environments/logic/graph_coloring/env.py
@@ -33,7 +33,7 @@
 from jumanji.viewer import Viewer
 
 
-class GraphColoring(Environment[State]):
+class GraphColoring(Environment[State, specs.DiscreteArray]):
     """Environment for the GraphColoring problem.
     The problem is a combinatorial optimization task where the goal is
       to assign a color to each vertex of a graph
@@ -76,7 +76,7 @@ class GraphColoring(Environment[State]):
     key = jax.random.key(0)
     state, timestep = jax.jit(env.reset)(key)
     env.render(state)
-    action = env.action_spec().generate_value()
+    action = env.action_spec.generate_value()
     state, timestep = jax.jit(env.step)(state, action)
     env.render(state)
     ```
@@ -100,6 +100,7 @@ def __init__(
             num_nodes=20, edge_probability=0.8
         )
         self.num_nodes = self.generator.num_nodes
+        super().__init__()
 
         # Create viewer used for rendering
         self._env_viewer = viewer or GraphColoringViewer(name="GraphColoring")
@@ -206,8 +207,8 @@ def step(
         )
         return next_state, timestep
 
-    def observation_spec(self) -> specs.Spec[Observation]:
-        """Returns the observation spec.
+    def _make_observation_spec(self) -> specs.Spec[Observation]:
+        """Returns new observation spec.
 
         Returns:
             Spec for the `Observation` whose fields are:
@@ -253,7 +254,7 @@ def observation_spec(self) -> specs.Spec[Observation]:
             ),
         )
 
-    def action_spec(self) -> specs.DiscreteArray:
+    def _make_action_spec(self) -> specs.DiscreteArray:
         """Specification of the action for the `GraphColoring` environment.
 
         Returns:

diff --git a/jumanji/environments/logic/graph_coloring/env_test.py b/jumanji/environments/logic/graph_coloring/env_test.py
@@ -18,7 +18,10 @@
 
 from jumanji.environments.logic.graph_coloring import GraphColoring
 from jumanji.environments.logic.graph_coloring.types import State
-from jumanji.testing.env_not_smoke import check_env_does_not_smoke
+from jumanji.testing.env_not_smoke import (
+    check_env_does_not_smoke,
+    check_env_specs_does_not_smoke,
+)
 from jumanji.testing.pytrees import assert_is_jax_array_tree
 from jumanji.types import TimeStep
 
@@ -90,3 +93,8 @@ def test_graph_coloring_get_action_mask(graph_coloring: GraphColoring) -> None:
 def test_graph_coloring_does_not_smoke(graph_coloring: GraphColoring) -> None:
     """Test that we can run an episode without any errors."""
     check_env_does_not_smoke(graph_coloring)
+
+
+def test_graph_coloring_specs_does_not_smoke(graph_coloring: GraphColoring) -> None:
+    """Test that we can access specs without any errors."""
+    check_env_specs_does_not_smoke(graph_coloring)
diff --git a/jumanji/environments/logic/minesweeper/env.py b/jumanji/environments/logic/minesweeper/env.py
@@ -36,7 +36,7 @@
 from jumanji.viewer import Viewer
 
 
-class Minesweeper(Environment[State]):
+class Minesweeper(Environment[State, specs.MultiDiscreteArray]):
     """A JAX implementation of the minesweeper game.
 
     - observation: `Observation`
@@ -81,7 +81,7 @@ class Minesweeper(Environment[State]):
     key = jax.random.key(0)
     state, timestep = jax.jit(env.reset)(key)
     env.render(state)
-    action = env.action_spec().generate_value()
+    action = env.action_spec.generate_value()
     state, timestep = jax.jit(env.step)(state, action)
     env.render(state)
     ```
@@ -127,6 +127,7 @@ def __init__(
         self.num_rows = self.generator.num_rows
         self.num_cols = self.generator.num_cols
         self.num_mines = self.generator.num_mines
+        super().__init__()
         self._viewer = viewer or MinesweeperViewer(
             num_rows=self.num_rows, num_cols=self.num_cols
         )
@@ -182,7 +183,7 @@ def step(
         )
         return next_state, next_timestep
 
-    def observation_spec(self) -> specs.Spec[Observation]:
+    def _make_observation_spec(self) -> specs.Spec[Observation]:
         """Specifications of the observation of the `Minesweeper` environment.
 
         Returns:
@@ -229,8 +230,8 @@ def observation_spec(self) -> specs.Spec[Observation]:
             step_count=step_count,
         )
 
-    def action_spec(self) -> specs.MultiDiscreteArray:
-        """Returns the action spec.
+    def _make_action_spec(self) -> specs.MultiDiscreteArray:
+        """Returns new action spec.
         An action consists of the height and width of the square to be explored.
 
         Returns:

diff --git a/jumanji/environments/logic/minesweeper/env_test.py b/jumanji/environments/logic/minesweeper/env_test.py
@@ -24,7 +24,10 @@
 
 from jumanji.environments.logic.minesweeper.env import Minesweeper
 from jumanji.environments.logic.minesweeper.types import State
-from jumanji.testing.env_not_smoke import check_env_does_not_smoke
+from jumanji.testing.env_not_smoke import (
+    check_env_does_not_smoke,
+    check_env_specs_does_not_smoke,
+)
 from jumanji.testing.pytrees import assert_is_jax_array_tree
 from jumanji.types import StepType, TimeStep
 
@@ -123,7 +126,7 @@ def test_minesweeper__step(minesweeper_env: Minesweeper) -> None:
     key = jax.random.PRNGKey(0)
     state, timestep = jax.jit(minesweeper_env.reset)(key)
     # For this board, this action will be a non-mined square
-    action = minesweeper_env.action_spec().generate_value()
+    action = minesweeper_env.action_spec.generate_value()
     next_state, next_timestep = step_fn(state, action)
 
     # Check that the state has changed
@@ -154,6 +157,11 @@ def test_minesweeper__does_not_smoke(minesweeper_env: Minesweeper) -> None:
     check_env_does_not_smoke(env=minesweeper_env)
 
 
+def test_minesweeper__specs_does_not_smoke(minesweeper_env: Minesweeper) -> None:
+    """Test that we can access specs without any errors."""
+    check_env_specs_does_not_smoke(minesweeper_env)
+
+
 def test_minesweeper__render(
     monkeypatch: pytest.MonkeyPatch, minesweeper_env: Minesweeper
 ) -> None:
@@ -162,7 +170,7 @@ def test_minesweeper__render(
     state, timestep = jax.jit(minesweeper_env.reset)(jax.random.PRNGKey(0))
     minesweeper_env.render(state)
     minesweeper_env.close()
-    action = minesweeper_env.action_spec().generate_value()
+    action = minesweeper_env.action_spec.generate_value()
     state, timestep = jax.jit(minesweeper_env.step)(state, action)
     minesweeper_env.render(state)
     minesweeper_env.close()
@@ -171,7 +179,7 @@ def test_minesweeper__render(
 def test_minesweeper__done_invalid_action(minesweeper_env: Minesweeper) -> None:
     """Test that the strict done signal is sent correctly"""
     # Note that this action corresponds to not stepping on a mine
-    action = minesweeper_env.action_spec().generate_value()
+    action = minesweeper_env.action_spec.generate_value()
     *_, episode_length = play_and_get_episode_stats(
         env=minesweeper_env, actions=[action for _ in range(10)], time_limit=10
     )