Updated remaining_trades to fractional_remaining_trades and added sup…

…port for more supplementary information to be output by the environment in the future
samre12 · Jun 1, 2018 · 96cf28b · 96cf28b
1 parent 14850a0
commit 96cf28b
Show file tree

Hide file tree

Showing 5 changed files with 12 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -56,7 +56,7 @@ env = gym.make('RealizedPnLEnv-v0')
 
 - Use `env.reset()` to start a new random episode.
 
-    - returns history of observations prior to the starting point of the episode. Look [Parameters](#params) for more information.
+    - returns history of observations prior to the starting point of the episode, fractional remaining trades that is `[1.0]` at the start of the episode. Look [Parameters](#params) for more information.
 
     ```python
     state = env.reset() # use state to make initial prediction
@@ -66,7 +66,7 @@ env = gym.make('RealizedPnLEnv-v0')
 
 - Use `env.step(action)` to take one step in the environment.
 
-    - returns `(observation, reward, is_terminal, remaining_trades)` in respective order
+    - returns `(observation, reward, is_terminal, fractional_remaining_trades)` in respective order
 
     ```python
     observation, reward, is_terminal, remaining_trades = env.step(action)
@@ -215,4 +215,6 @@ Listing changes from [**`b9af98db728230569a18d54dcfa87f7337930314`**](https://gi
 
 ### Breaking Changes
 
-- Environment with **Unrealized PnL** reward function is now built using `env = gym.make('UnrealizedPnLEnv-v0')` rather than `env = gym.make('CryptoTrading-v0')`
+- Environment with **Unrealized PnL** reward function is now built using `env = gym.make('UnrealizedPnLEnv-v0')` rather than `env = gym.make('CryptoTrading-v0')`
+
+- Instead of `remaining_trades`, `env.step(action)` now outputs `np.array([fractional_remaining_trades])`. This is to take into account more supplementary information (like **technical indicators**) in the future
diff --git a/gym_cryptotrading/envs/cryptoenv.py b/gym_cryptotrading/envs/cryptoenv.py
@@ -63,7 +63,7 @@ def _new_random_episode(self):
 
         map(self.logger.debug, message_list)
 
-        return self.historical_prices[self.current - self.history_length:self.current]
+        return self.historical_prices[self.current - self.history_length:self.current], np.array([1.0])
 
 
     def _reset_params(self):

diff --git a/gym_cryptotrading/envs/realizedPnL.py b/gym_cryptotrading/envs/realizedPnL.py
@@ -48,6 +48,6 @@ def step(self, action):
         self.timesteps = self.timesteps + 1
         if self.timesteps is not self.horizon:
             self.current = self.current + 1
-            return state, reward, False, float(self.horizon - self.timesteps)
+            return state, reward, False, np.array([float(self.horizon - self.timesteps) / self.horizon])
         else:
-            return state, reward, True, 0.0
+            return state, reward, True, np.array([0.0])
diff --git a/gym_cryptotrading/envs/unrealizedPnL.py b/gym_cryptotrading/envs/unrealizedPnL.py
@@ -42,7 +42,7 @@ def step(self, action):
         self.timesteps = self.timesteps + 1
         if self.timesteps is not self.horizon:
             self.current = self.current + 1
-            return state, reward, False, float(self.horizon - self.timesteps)
+            return state, reward, False, np.array([float(self.horizon - self.timesteps) / self.horizon])
         else:
-            return state, reward, True, 0.0
+            return state, reward, True, np.array([0.0])
 
diff --git a/gym_cryptotrading/envs/weightedPnL.py b/gym_cryptotrading/envs/weightedPnL.py
@@ -93,6 +93,6 @@ def step(self, action):
         self.timesteps = self.timesteps + 1
         if self.timesteps is not self.horizon:
             self.current = self.current + 1
-            return state, reward, False, float(self.horizon - self.timesteps)
+            return state, reward, False, np.array([float(self.horizon - self.timesteps) / self.horizon])
         else:
-            return state, reward, True, 0.0
+            return state, reward, True, np.array([0.0])