From cd0011363989ebcca6bbec0fa468954fb9b50310 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Tue, 25 May 2021 14:56:30 +0200 Subject: [PATCH 01/24] Added range getter for observations and normalization with that ranges in wrapper --- energym/utils/callbacks.py | 3 -- energym/utils/common.py | 72 ++++++++++++++++++++++++++++++++++++++ energym/utils/wrappers.py | 19 ++++++++-- examples/mlflow_A2C.py | 7 ++-- 4 files changed, 94 insertions(+), 7 deletions(-) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index c052d24892..d3d1fd02b2 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -30,9 +30,6 @@ def _on_step(self) -> bool: info = self.locals['infos'][-1] obs_dict = dict(zip(self.training_env.get_attr('variables')[ 0]['observation'], self.locals['new_obs'][0])) - # obs_dict['day'] = info['day'] - # obs_dict['month'] = info['month'] - # obs_dict['hour'] = info['hour'] for key in obs_dict: self.logger.record('observation/'+key, obs_dict[key]) diff --git a/energym/utils/common.py b/energym/utils/common.py index c4ec258037..fa2e6214b7 100644 --- a/energym/utils/common.py +++ b/energym/utils/common.py @@ -10,6 +10,41 @@ from datetime import datetime, timedelta +# NORMALIZATION RANGES +RANGES_5ZONE = {'Facility Total HVAC Electric Demand Power (Whole Building)': [173.6583692738386, + 32595.57259261767], + 'People Air Temperature (SPACE1-1 PEOPLE 1)': [0.0, 30.00826655379267], + 'Site Diffuse Solar Radiation Rate per Area (Environment)': [0.0, 588.0], + 'Site Direct Solar Radiation Rate per Area (Environment)': [0.0, 1033.0], + 'Site Outdoor Air Drybulb Temperature (Environment)': [-31.05437255409474, + 60.72839186915495], + 'Site Outdoor Air Relative Humidity (Environment)': [3.0, 100.0], + 'Site Wind Direction (Environment)': [0.0, 357.5], + 'Site Wind Speed (Environment)': [0.0, 23.1], + 'Space1-ClgSetP-RL': [21.0, 30.0], + 'Space1-HtgSetP-RL': [15.0, 22.49999], + 'Zone Air Relative Humidity (SPACE1-1)': [3.287277410867238, + 87.60662171287048], + 'Zone Air Temperature (SPACE1-1)': [15.22565264653451, 30.00826655379267], + 'Zone People Occupant Count (SPACE1-1)': [0.0, 11.0], + 'Zone Thermal Comfort Clothing Value (SPACE1-1 PEOPLE 1)': [0.0, 1.0], + 'Zone Thermal Comfort Fanger Model PPD (SPACE1-1 PEOPLE 1)': [0.0, + 98.37141259444684], + 'Zone Thermal Comfort Mean Radiant Temperature (SPACE1-1 PEOPLE 1)': [0.0, + 35.98853496778508], + 'Zone Thermostat Cooling Setpoint Temperature (SPACE1-1)': [21.0, 30.0], + 'Zone Thermostat Heating Setpoint Temperature (SPACE1-1)': [15.0, + 22.49999046325684], + 'comfort_penalty': [-6.508266553792669, -0.0], + 'day': [1, 31], + 'done': [False, True], + 'hour': [0, 23], + 'month': [1, 12], + 'reward': [-3.550779087370951, -0.0086829184636919], + 'time (seconds)': [0, 31536000], + 'timestep': [0, 35040], + 'total_power_no_units': [-3.259557259261767, -0.0173658369273838]} + def get_delta_seconds(year, st_mon, st_day, end_mon, end_day): """Returns the delta seconds between `year:st_mon:st_day:0:0:0` and @@ -184,6 +219,43 @@ def create_variable_weather(weather_data, original_epw_file, columns: list = ['d return filename +def ranges_extractor(output_path, last_result=None): + """Given a path with simulations outputs, this function is used to extract max and min absolute valors of all episodes in each variable. If a dict ranges is given, will be updated + + Args: + output_path (str): path with simulations directories (Eplus-env-*). + last_result (dict): Last ranges dict to be updated. This will be created if it is not given. + + Returns: + dict: list min,max of each variable as a key. + """ + if last_result is not None: + result = last_result + else: + result = {} + + content = os.listdir(output_path) + for simulation in content: + if os.path.isdir(output_path+'/'+simulation) and simulation.startswith('Eplus-env'): + simulation_content = os.listdir(output_path+'/'+simulation) + for episode_dir in simulation_content: + if os.path.isdir(output_path+'/'+simulation+'/'+episode_dir): + monitor_path = output_path+'/'+simulation+'/'+episode_dir+'/monitor.csv' + data = pd.read_csv(monitor_path) + + if len(result) == 0: + for column in data: + # variable : [min,max] + result[column] = [np.inf, -np.inf] + + for column in data: + if np.min(data[column]) < result[column][0]: + result[column][0] = np.min(data[column]) + if np.max(data[column]) > result[column][1]: + result[column][1] = np.max(data[column]) + return result + + class Logger(): def getLogger(self, name, level, formatter): logger = logging.getLogger(name) diff --git a/energym/utils/wrappers.py b/energym/utils/wrappers.py index 53e8ae8ef7..95d669aa1f 100644 --- a/energym/utils/wrappers.py +++ b/energym/utils/wrappers.py @@ -4,6 +4,7 @@ import gym from collections import deque +from energym.utils.common import RANGES_5ZONE class NormalizeObservation(gym.ObservationWrapper): @@ -17,7 +18,7 @@ def __init__(self, env): super(NormalizeObservation, self).__init__(env) def observation(self, obs): - """Applies *tanh* to observation. + """Applies normalization to observation. Args: obs (object): Original observation. @@ -25,7 +26,21 @@ def observation(self, obs): Returns: object: Normalized observation. """ - return np.tanh(obs) + # Don't have variables name, we need to get it and add manually news + keys = self.env.variables["observation"] + keys.append('day') + keys.append('month') + keys.append('hour') + obs_dict = dict(zip(keys, obs)) + + for key in obs_dict: + # normalization + value = obs_dict[key] + value = (value-RANGES_5ZONE[key][0]) / \ + (RANGES_5ZONE[key][1]-RANGES_5ZONE[key][0]) + obs_dict[key] = value + # Return obs values in the SAME ORDER than obs argument. + return np.array(list(obs_dict.values())) class MultiObsWrapper(gym.Wrapper): diff --git a/examples/mlflow_A2C.py b/examples/mlflow_A2C.py index 63211f7d71..4c21d6efff 100644 --- a/examples/mlflow_A2C.py +++ b/examples/mlflow_A2C.py @@ -13,6 +13,8 @@ import numpy as np from energym.utils.callbacks import LoggerCallback, LoggerEvalCallback +from energym.utils.wrappers import NormalizeObservation + from stable_baselines3 import A2C from stable_baselines3.common.callbacks import EvalCallback, BaseCallback, CallbackList from stable_baselines3.common.vec_env import DummyVecEnv @@ -51,6 +53,7 @@ mlflow.log_param('rms_prop_eps', args.rms_prop_eps) env = gym.make(environment) + env = NormalizeObservation(env) #### TRAINING #### @@ -74,10 +77,10 @@ env.env_method('activate_logger') # Callbacks - freq = 1 # evaluate every 5 episodes + freq = 8 # evaluate every N episodes eval_callback = LoggerEvalCallback(env, best_model_save_path='./best_models/' + name + '/', log_path='./best_models/' + name + '/', eval_freq=n_timesteps_episode * freq, - deterministic=True, render=False, n_eval_episodes=5) + deterministic=True, render=False, n_eval_episodes=2) log_callback = LoggerCallback() callback = CallbackList([log_callback, eval_callback]) From 983fd033304c3c24ea411ba1f632aed2b8c190a7 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 26 May 2021 11:15:57 +0200 Subject: [PATCH 02/24] New observation normalize: Added normalized observation record to tensorboard (independently to original observation values) --- energym/utils/callbacks.py | 20 ++++++++++++++++++-- energym/utils/common.py | 2 +- energym/utils/wrappers.py | 22 ++++++++++++++++++---- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index d3d1fd02b2..f611f40a71 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -2,6 +2,7 @@ import numpy as np import gym import os +from energym.utils.wrappers import NormalizeObservation import warnings from typing import Any, Callable, Dict, List, Optional, Tuple, Union @@ -30,8 +31,23 @@ def _on_step(self) -> bool: info = self.locals['infos'][-1] obs_dict = dict(zip(self.training_env.get_attr('variables')[ 0]['observation'], self.locals['new_obs'][0])) - for key in obs_dict: - self.logger.record('observation/'+key, obs_dict[key]) + + if self.training_env.env_is_wrapped(wrapper_class=NormalizeObservation)[0]: + for key in obs_dict: + self.logger.record( + 'normalized_observation/'+key, obs_dict[key]) + # unwrapped observation (DummyVec so we need specify index 0) + obs = self.training_env.env_method('get_unwrapped_obs')[0] + obs_dict = dict(zip(self.training_env.get_attr('variables')[ + 0]['observation'], obs)) + for key in obs_dict: + self.logger.record( + 'observation/'+key, obs_dict[key]) + else: + # Only original observation + for key in obs_dict: + self.logger.record( + 'observation/'+key, obs_dict[key]) # Store episode data self.ep_rewards.append(self.locals['rewards'][-1]) diff --git a/energym/utils/common.py b/energym/utils/common.py index fa2e6214b7..17c5ded332 100644 --- a/energym/utils/common.py +++ b/energym/utils/common.py @@ -219,7 +219,7 @@ def create_variable_weather(weather_data, original_epw_file, columns: list = ['d return filename -def ranges_extractor(output_path, last_result=None): +def ranges_getter(output_path, last_result=None): """Given a path with simulations outputs, this function is used to extract max and min absolute valors of all episodes in each variable. If a dict ranges is given, will be updated Args: diff --git a/energym/utils/wrappers.py b/energym/utils/wrappers.py index 95d669aa1f..dca63d9086 100644 --- a/energym/utils/wrappers.py +++ b/energym/utils/wrappers.py @@ -9,13 +9,16 @@ class NormalizeObservation(gym.ObservationWrapper): - def __init__(self, env): - """Observations normalized to range [-1, 1]. + def __init__(self, env, ranges=RANGES_5ZONE): + """Observations normalized to range [0, 1]. Args: env (object): Original Gym environment. + ranges: Observation variables ranges to apply normalization (rely on environment) """ super(NormalizeObservation, self).__init__(env) + self.unwrapped_observation = None + self.ranges = ranges def observation(self, obs): """Applies normalization to observation. @@ -26,7 +29,10 @@ def observation(self, obs): Returns: object: Normalized observation. """ + # Save original obs in class attribute + self.unwrapped_observation = obs # Don't have variables name, we need to get it and add manually news + # len(variables)!=len(obs) keys = self.env.variables["observation"] keys.append('day') keys.append('month') @@ -36,12 +42,20 @@ def observation(self, obs): for key in obs_dict: # normalization value = obs_dict[key] - value = (value-RANGES_5ZONE[key][0]) / \ - (RANGES_5ZONE[key][1]-RANGES_5ZONE[key][0]) + value = (value-self.ranges[key][0]) / \ + (self.ranges[key][1]-self.ranges[key][0]) + # If value is out + if value > 1: + value = 1 + if value < 0: + value = 0 obs_dict[key] = value # Return obs values in the SAME ORDER than obs argument. return np.array(list(obs_dict.values())) + def get_unwrapped_obs(self): + return self.unwrapped_observation + class MultiObsWrapper(gym.Wrapper): From 829a4725f73b613dc25aafdee250e95296515ecc Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 26 May 2021 11:16:46 +0200 Subject: [PATCH 03/24] Update wrapper tests to new normalization based on ranges --- tests/conftest.py | 2 +- tests/test_wrapper.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6aecc13853..984de6cb25 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -96,7 +96,7 @@ def env_demo_continuous(idf_path, weather_path, variable_path, space_path): @pytest.fixture(scope='module') def env_wrapper(env_demo): - return NormalizeObservation(MultiObsWrapper(env=env_demo, n=5)) + return MultiObsWrapper(env=NormalizeObservation(env=env_demo), n=5) @pytest.fixture(scope='module') diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py index faf2a85b5c..44dda80916 100644 --- a/tests/test_wrapper.py +++ b/tests/test_wrapper.py @@ -8,7 +8,7 @@ def test_env_wrappers(env_wrapper): for i in range(1): # Only need 1 episode obs = env_wrapper.reset() # This obs should be normalize --> [-1,1] - assert (obs >= -1).all() and (obs <= 1).all() + assert (obs >= 0).all() and (obs <= 1).all() done = False while not done: From 31cec352119b421da9c588020af444bb300a95be Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 26 May 2021 13:39:10 +0200 Subject: [PATCH 04/24] Improved code for wrapper and tensorboard logger callback --- energym/utils/callbacks.py | 30 ++++++++++++++---------------- energym/utils/wrappers.py | 31 ++++++++++++------------------- 2 files changed, 26 insertions(+), 35 deletions(-) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index f611f40a71..28c3e8c2af 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -28,29 +28,27 @@ def _on_training_start(self): self.training_env.env_method('deactivate_logger') def _on_step(self) -> bool: - info = self.locals['infos'][-1] - obs_dict = dict(zip(self.training_env.get_attr('variables')[ - 0]['observation'], self.locals['new_obs'][0])) + variables = self.training_env.get_attr('variables')[0]['observation'] + # log normalized and original values if self.training_env.env_is_wrapped(wrapper_class=NormalizeObservation)[0]: - for key in obs_dict: - self.logger.record( - 'normalized_observation/'+key, obs_dict[key]) - # unwrapped observation (DummyVec so we need specify index 0) + obs_normalized = self.locals['new_obs'][0] obs = self.training_env.env_method('get_unwrapped_obs')[0] - obs_dict = dict(zip(self.training_env.get_attr('variables')[ - 0]['observation'], obs)) - for key in obs_dict: + for i, variable in enumerate(variables): + self.logger.record( + 'normalized_observation/'+variable, obs_normalized[i]) self.logger.record( - 'observation/'+key, obs_dict[key]) + 'observation/'+variable, obs[i]) + # Only original values else: - # Only original observation - for key in obs_dict: + obs = self.locals['new_obs'][0] + for i, variable in enumerate(variables): self.logger.record( - 'observation/'+key, obs_dict[key]) + 'observation/'+variable, obs[i]) # Store episode data - self.ep_rewards.append(self.locals['rewards'][-1]) + info = self.locals['infos'][0] + self.ep_rewards.append(self.locals['rewards'][0]) self.ep_powers.append(info['total_power']) self.ep_term_comfort.append(info['comfort_penalty']) self.ep_term_energy.append(info['total_power_no_units']) @@ -59,7 +57,7 @@ def _on_step(self) -> bool: self.ep_timesteps += 1 # If episode ends - if self.locals['dones'][-1]: + if self.locals['dones'][0]: self.cumulative_reward = np.sum(self.ep_rewards) self.mean_reward = np.mean(self.ep_rewards) diff --git a/energym/utils/wrappers.py b/energym/utils/wrappers.py index dca63d9086..328a57386f 100644 --- a/energym/utils/wrappers.py +++ b/energym/utils/wrappers.py @@ -30,28 +30,21 @@ def observation(self, obs): object: Normalized observation. """ # Save original obs in class attribute - self.unwrapped_observation = obs - # Don't have variables name, we need to get it and add manually news - # len(variables)!=len(obs) - keys = self.env.variables["observation"] - keys.append('day') - keys.append('month') - keys.append('hour') - obs_dict = dict(zip(keys, obs)) - - for key in obs_dict: + self.unwrapped_observation = obs.copy() + variables = self.env.variables["observation"] + + # NOTE: If you want to recor day, month and our. You should add to variables that keys + for i, variable in enumerate(variables): # normalization - value = obs_dict[key] - value = (value-self.ranges[key][0]) / \ - (self.ranges[key][1]-self.ranges[key][0]) + obs[i] = (obs[i]-self.ranges[variable][0]) / \ + (self.ranges[variable][1]-self.ranges[variable][0]) # If value is out - if value > 1: - value = 1 - if value < 0: - value = 0 - obs_dict[key] = value + if obs[i] > 1: + obs[i] = 1 + if obs[i] < 0: + obs[i] = 0 # Return obs values in the SAME ORDER than obs argument. - return np.array(list(obs_dict.values())) + return np.array(obs) def get_unwrapped_obs(self): return self.unwrapped_observation From 7312d3f95a0a2b2b4b6cd2e27e9ac36cd406467d Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 26 May 2021 14:56:15 +0200 Subject: [PATCH 05/24] Tensorboard action records --- energym/utils/callbacks.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index 28c3e8c2af..52fdb3a3b6 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -28,6 +28,7 @@ def _on_training_start(self): self.training_env.env_method('deactivate_logger') def _on_step(self) -> bool: + # OBSERVATION variables = self.training_env.get_attr('variables')[0]['observation'] # log normalized and original values @@ -46,6 +47,13 @@ def _on_step(self) -> bool: self.logger.record( 'observation/'+variable, obs[i]) + # ACTION + variables = self.training_env.get_attr('variables')[0]['action'] + action = self.locals['actions'][0] + for i, variable in enumerate(variables): + self.logger.record( + 'action/'+variable, action[i]) + # Store episode data info = self.locals['infos'][0] self.ep_rewards.append(self.locals['rewards'][0]) From bbca462d842a91a5c99bb9130bf0aff8bdb791fe Mon Sep 17 00:00:00 2001 From: Alejandro Date: Thu, 27 May 2021 13:18:39 +0200 Subject: [PATCH 06/24] Improved eval metrics tensorboard --- energym/utils/callbacks.py | 147 +++++++++++++++++++------------------ 1 file changed, 75 insertions(+), 72 deletions(-) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index 52fdb3a3b6..d7df17f370 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -33,8 +33,8 @@ def _on_step(self) -> bool: # log normalized and original values if self.training_env.env_is_wrapped(wrapper_class=NormalizeObservation)[0]: - obs_normalized = self.locals['new_obs'][0] - obs = self.training_env.env_method('get_unwrapped_obs')[0] + obs_normalized = self.locals['new_obs'][-1] + obs = self.training_env.env_method('get_unwrapped_obs')[-1] for i, variable in enumerate(variables): self.logger.record( 'normalized_observation/'+variable, obs_normalized[i]) @@ -42,21 +42,21 @@ def _on_step(self) -> bool: 'observation/'+variable, obs[i]) # Only original values else: - obs = self.locals['new_obs'][0] + obs = self.locals['new_obs'][-1] for i, variable in enumerate(variables): self.logger.record( 'observation/'+variable, obs[i]) # ACTION variables = self.training_env.get_attr('variables')[0]['action'] - action = self.locals['actions'][0] + action = self.locals['actions'][-1] for i, variable in enumerate(variables): self.logger.record( 'action/'+variable, action[i]) # Store episode data - info = self.locals['infos'][0] - self.ep_rewards.append(self.locals['rewards'][0]) + info = self.locals['infos'][-1] + self.ep_rewards.append(self.locals['rewards'][-1]) self.ep_powers.append(info['total_power']) self.ep_term_comfort.append(info['comfort_penalty']) self.ep_term_energy.append(info['total_power_no_units']) @@ -64,15 +64,26 @@ def _on_step(self) -> bool: self.num_comfort_violation += 1 self.ep_timesteps += 1 - # If episode ends - if self.locals['dones'][0]: + # If episode ends, store summary of episode and reset + if self.locals['dones'][-1]: + # store last episode metrics + self.episode_metrics = {} + self.episode_metrics['cumulutative_reward'] = np.sum( + self.ep_rewards) + self.episode_metrics['mean_reward'] = np.mean(self.ep_rewards) + self.episode_metrics['mean_power'] = np.mean(self.ep_powers) + self.episode_metrics['cumulutative_power'] = np.sum(self.ep_powers) + self.episode_metrics['mean_comfort_penalty'] = np.mean( + self.ep_term_comfort) + self.episode_metrics['cumulutative_comfort_penalty'] = np.sum( + self.ep_term_comfort) + self.episode_metrics['mean_power_penalty'] = np.mean( + self.ep_term_energy) + self.episode_metrics['cumulutative_power_penalty'] = np.sum( + self.ep_term_energy) + self.episode_metrics['comfort_violation_time(%)'] = self.num_comfort_violation / \ + self.ep_timesteps*100 - self.cumulative_reward = np.sum(self.ep_rewards) - self.mean_reward = np.mean(self.ep_rewards) - self.mean_power = np.mean(self.ep_powers) - self.mean_term_comfort = np.mean(self.ep_term_comfort) - self.mean_term_power = np.mean(self.ep_term_energy) - self.comfort_violation = self.num_comfort_violation/self.ep_timesteps*100 # reset episode info self.ep_rewards = [] self.ep_powers = [] @@ -81,18 +92,11 @@ def _on_step(self) -> bool: self.ep_timesteps = 0 self.num_comfort_violation = 0 - # In the first episode, logger doesn't have these attributes - if(hasattr(self, 'cumulative_reward')): - self.logger.record('episode/cumulative_reward', - self.cumulative_reward) - self.logger.record('episode/mean_reward', self.mean_reward) - self.logger.record('episode/mean_power', self.mean_power) - self.logger.record('episode/comfort_violation(%)', - self.comfort_violation) - self.logger.record('episode/mean_comfort_penalty', - self.mean_term_comfort) - self.logger.record('episode/mean_power_penalty', - self.mean_term_power) + # During first episode, as it not finished, it shouldn't be recording + if hasattr(self, 'episode_metrics'): + for key, metric in self.episode_metrics.items(): + self.logger.record( + 'episode/'+key, metric) return True @@ -139,6 +143,7 @@ def __init__( self.evaluations_comfort_violation = [] self.evaluations_comfort_penalty = [] self.evaluations_power_penalty = [] + self.evaluation_metrics = {} def _on_step(self) -> bool: @@ -149,7 +154,7 @@ def _on_step(self) -> bool: # Reset success rate buffer self._is_success_buffer = [] - episode_rewards, episode_lengths, episode_powers, episode_comfort_violations, episode_comfort_penalties, episode_power_penalties = evaluate_policy( + episodes_rewards, episodes_lengths, episodes_powers, episodes_comfort_violations, episodes_comfort_penalties, episodes_power_penalties = evaluate_policy( self.model, self.eval_env, n_eval_episodes=self.n_eval_episodes, @@ -162,14 +167,14 @@ def _on_step(self) -> bool: if self.log_path is not None: self.evaluations_timesteps.append(self.num_timesteps) - self.evaluations_results.append(episode_rewards) - self.evaluations_length.append(episode_lengths) - self.evaluations_power_consumption.append(episode_powers) + self.evaluations_results.append(episodes_rewards) + self.evaluations_length.append(episodes_lengths) + self.evaluations_power_consumption.append(episodes_powers) self.evaluations_comfort_violation.append( - episode_comfort_violations) + episodes_comfort_violations) self.evaluations_comfort_penalty.append( - episode_comfort_penalties) - self.evaluations_power_penalty.append(episode_power_penalties) + episodes_comfort_penalties) + self.evaluations_power_penalty.append(episodes_power_penalties) kwargs = {} # Save success log if present @@ -184,22 +189,27 @@ def _on_step(self) -> bool: ep_lengths=self.evaluations_length, ep_powers=self.evaluations_power_consumption, ep_comfort_violations=self.evaluations_comfort_violation, - episode_comfort_penalties=self.evaluations_comfort_penalty, - episode_power_penalties=self.evaluations_power_penalty, + episodes_comfort_penalties=self.evaluations_comfort_penalty, + episodes_power_penalties=self.evaluations_power_penalty, **kwargs, ) mean_reward, std_reward = np.mean( - episode_rewards), np.std(episode_rewards) + episodes_rewards), np.std(episodes_rewards) mean_ep_length, std_ep_length = np.mean( - episode_lengths), np.std(episode_lengths) - mean_ep_power, std_ep_power = np.mean( - episode_powers), np.std(episode_powers) - mean_ep_comfort_violation, mean_std_comfort_violation = np.mean( - episode_comfort_violations), np.std(episode_comfort_violations) - self.last_mean_reward = mean_reward - mean_ep_comfort_penalty = np.mean(episode_comfort_penalties) - mean_ep_power_penalty = np.mean(episode_power_penalties) + episodes_lengths), np.std(episodes_lengths) + + self.evaluation_metrics['cumulutative_reward'] = np.mean( + mean_reward) + self.evaluation_metrics['ep_length'] = mean_ep_length + self.evaluation_metrics['power_consumption'] = np.mean( + episodes_powers) + self.evaluation_metrics['comfort_violation(%)'] = np.mean( + episodes_comfort_violations) + self.evaluation_metrics['comfort_penalty'] = np.mean( + episodes_comfort_penalties) + self.evaluation_metrics['power_penalty'] = np.mean( + episodes_power_penalties) if self.verbose > 0: print( @@ -207,15 +217,8 @@ def _on_step(self) -> bool: print( f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}") # Add to current Logger - self.logger.record("eval/mean_reward", float(mean_reward)) - self.logger.record("eval/mean_ep_length", mean_ep_length) - self.logger.record("eval/mean_power_consumption", mean_ep_power) - self.logger.record("eval/mean_comfort_violation(%)", - mean_ep_comfort_violation) - self.logger.record("eval/mean_power_penalty", - mean_ep_power_penalty) - self.logger.record("eval/mean_comfort_penalty", - mean_ep_comfort_penalty) + for key, metric in self.evaluation_metrics.items(): + self.logger.record('eval/'+key, metric) if len(self._is_success_buffer) > 0: success_rate = np.mean(self._is_success_buffer) @@ -240,7 +243,7 @@ def _on_step(self) -> bool: def evaluate_policy( model: "base_class.BaseAlgorithm", env: Union[gym.Env, VecEnv], - n_eval_episodes: int = 10, + n_eval_episodes: int = 5, deterministic: bool = True, render: bool = False, callback: Optional[Callable[[Dict[str, Any], Dict[str, Any]], None]] = None, @@ -298,9 +301,9 @@ def evaluate_policy( UserWarning, ) - episode_rewards, episode_lengths, episode_powers, episode_comfort_violations, episode_comfort_penalties, episode_power_penalties = [], [], [], [], [], [] + episodes_rewards, episodes_lengths, episodes_powers, episodes_comfort_violations, episodes_comfort_penalties, episodes_power_penalties = [], [], [], [], [], [] not_reseted = True - while len(episode_rewards) < n_eval_episodes: + while len(episodes_rewards) < n_eval_episodes: # Number of loops here might differ from true episodes # played, if underlying wrappers modify episode lengths. # Avoid double reset, as VecEnv are reset automatically. @@ -338,25 +341,25 @@ def evaluate_policy( if "episode" in info.keys(): # Monitor wrapper includes "episode" key in info if environment # has been wrapped with it. Use those rewards instead. - episode_rewards.append(info["episode"]["r"]) - episode_lengths.append(info["episode"]["l"]) + episodes_rewards.append(info["episode"]["r"]) + episodes_lengths.append(info["episode"]["l"]) else: - episode_rewards.append(episode_reward) - episode_lengths.append(episode_length) - episode_powers.append(episode_power) - episode_comfort_violations.append( + episodes_rewards.append(episode_reward) + episodes_lengths.append(episode_length) + episodes_powers.append(episode_power) + episodes_comfort_violations.append( episode_steps_comfort_violation/episode_length*100) - episode_comfort_penalties.append(episode_comfort_penalty) - episode_power_penalties.append(episode_power_penalty) - - mean_reward = np.mean(episode_rewards) - std_reward = np.std(episode_rewards) - # mean_power = np.mean(episode_powers) - # std_power = np.std(episode_powers) - # mean_comfort_violation= np.mean(episode_comfort_violations) - # std_comfort_violation= np.std(episode_comfort_violations) + episodes_comfort_penalties.append(episode_comfort_penalty) + episodes_power_penalties.append(episode_power_penalty) + + mean_reward = np.mean(episodes_rewards) + std_reward = np.std(episodes_rewards) + # mean_power = np.mean(episodes_powers) + # std_power = np.std(episodes_powers) + # mean_comfort_violation= np.mean(episodes_comfort_violations) + # std_comfort_violation= np.std(episodes_comfort_violations) if reward_threshold is not None: assert mean_reward > reward_threshold, "Mean reward below threshold: " f"{mean_reward:.2f} < {reward_threshold:.2f}" if return_episode_rewards: - return episode_rewards, episode_lengths, episode_powers, episode_comfort_violations, episode_comfort_penalties, episode_power_penalties + return episodes_rewards, episodes_lengths, episodes_powers, episodes_comfort_violations, episodes_comfort_penalties, episodes_power_penalties return mean_reward, std_reward From fa22a215f85b370ba506fa2b2668acfc32e3aaab Mon Sep 17 00:00:00 2001 From: Alejandro Date: Thu, 27 May 2021 16:24:15 +0200 Subject: [PATCH 07/24] Fixed bug in tensorboard action record (discrete environments) --- energym/utils/callbacks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index d7df17f370..c16bc8b092 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -50,6 +50,8 @@ def _on_step(self) -> bool: # ACTION variables = self.training_env.get_attr('variables')[0]['action'] action = self.locals['actions'][-1] + if self.training_env.get_attr('flag_discrete')[0]: + action=self.training_env.get_attr('action_mapping')[0][action] for i, variable in enumerate(variables): self.logger.record( 'action/'+variable, action[i]) From 13debf6d795bcf15962d89421f603d3cce2417e6 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Mon, 31 May 2021 10:03:36 +0200 Subject: [PATCH 08/24] Fixed bug logger DivisionByZero Exception in empty episodes - fix #40 --- energym/utils/callbacks.py | 23 +++++++++++++++-------- energym/utils/common.py | 8 ++++++-- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index c16bc8b092..98c4f71a15 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -70,21 +70,25 @@ def _on_step(self) -> bool: if self.locals['dones'][-1]: # store last episode metrics self.episode_metrics = {} - self.episode_metrics['cumulutative_reward'] = np.sum( + self.episode_metrics['ep_length'] = self.ep_timesteps + self.episode_metrics['cumulative_reward'] = np.sum( self.ep_rewards) self.episode_metrics['mean_reward'] = np.mean(self.ep_rewards) self.episode_metrics['mean_power'] = np.mean(self.ep_powers) - self.episode_metrics['cumulutative_power'] = np.sum(self.ep_powers) + self.episode_metrics['cumulative_power'] = np.sum(self.ep_powers) self.episode_metrics['mean_comfort_penalty'] = np.mean( self.ep_term_comfort) - self.episode_metrics['cumulutative_comfort_penalty'] = np.sum( + self.episode_metrics['cumulative_comfort_penalty'] = np.sum( self.ep_term_comfort) self.episode_metrics['mean_power_penalty'] = np.mean( self.ep_term_energy) - self.episode_metrics['cumulutative_power_penalty'] = np.sum( + self.episode_metrics['cumulative_power_penalty'] = np.sum( self.ep_term_energy) - self.episode_metrics['comfort_violation_time(%)'] = self.num_comfort_violation / \ + try: + self.episode_metrics['comfort_violation_time(%)'] = self.num_comfort_violation / \ self.ep_timesteps*100 + except ZeroDivisionError: + self.episode_metrics['comfort_violation_time(%)'] = np.nan # reset episode info self.ep_rewards = [] @@ -201,7 +205,7 @@ def _on_step(self) -> bool: mean_ep_length, std_ep_length = np.mean( episodes_lengths), np.std(episodes_lengths) - self.evaluation_metrics['cumulutative_reward'] = np.mean( + self.evaluation_metrics['cumulative_reward'] = np.mean( mean_reward) self.evaluation_metrics['ep_length'] = mean_ep_length self.evaluation_metrics['power_consumption'] = np.mean( @@ -349,8 +353,11 @@ def evaluate_policy( episodes_rewards.append(episode_reward) episodes_lengths.append(episode_length) episodes_powers.append(episode_power) - episodes_comfort_violations.append( - episode_steps_comfort_violation/episode_length*100) + try: + episodes_comfort_violations.append( + episode_steps_comfort_violation/episode_length*100) + except ZeroDivisionError: + episodes_comfort_violations.append(np.nan) episodes_comfort_penalties.append(episode_comfort_penalty) episodes_power_penalties.append(episode_power_penalty) diff --git a/energym/utils/common.py b/energym/utils/common.py index 17c5ded332..d76f92075e 100644 --- a/energym/utils/common.py +++ b/energym/utils/common.py @@ -303,8 +303,12 @@ def log_episode(self, episode): ep_mean_reward = np.mean(self.rewards) ep_total_reward = np.sum(self.rewards) ep_mean_power = np.mean(self.powers) - comfort_violation = ( - self.comfort_violation_timesteps/self.total_timesteps*100) + try: + comfort_violation = ( + self.comfort_violation_timesteps/self.total_timesteps*100) + except ZeroDivisionError: + comfort_violation = np.nan + # write steps_info in monitor.csv with open(self.log_file, 'w', newline='') as file_obj: From 584df1f7aeb1fbf4612ed66097dce6d71e476abb Mon Sep 17 00:00:00 2001 From: Alejandro Date: Mon, 31 May 2021 12:48:42 +0200 Subject: [PATCH 09/24] Fixed #41, KeyError with logger in DDPG, DQN and SAC --- energym/utils/callbacks.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index 98c4f71a15..41678942fa 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -4,6 +4,8 @@ import os from energym.utils.wrappers import NormalizeObservation +from pprint import pprint + import warnings from typing import Any, Callable, Dict, List, Optional, Tuple, Union from stable_baselines3.common import base_class @@ -49,7 +51,14 @@ def _on_step(self) -> bool: # ACTION variables = self.training_env.get_attr('variables')[0]['action'] - action = self.locals['actions'][-1] + try: + action = self.locals['actions'][-1] + except KeyError: + try: + action = self.locals['action'][-1] + except KeyError: + print('Algorithm action key in locals dict unknown') + if self.training_env.get_attr('flag_discrete')[0]: action=self.training_env.get_attr('action_mapping')[0][action] for i, variable in enumerate(variables): @@ -58,7 +67,14 @@ def _on_step(self) -> bool: # Store episode data info = self.locals['infos'][-1] - self.ep_rewards.append(self.locals['rewards'][-1]) + try: + self.ep_rewards.append(self.locals['rewards'][-1]) + except KeyError: + try: + self.ep_rewards.append(self.locals['reward'][-1]) + except KeyError: + print('Algorithm reward key in locals dict unknown') + self.ep_powers.append(info['total_power']) self.ep_term_comfort.append(info['comfort_penalty']) self.ep_term_energy.append(info['total_power_no_units']) @@ -67,7 +83,14 @@ def _on_step(self) -> bool: self.ep_timesteps += 1 # If episode ends, store summary of episode and reset - if self.locals['dones'][-1]: + try: + done= self.locals['dones'][-1] + except KeyError: + try: + done= self.locals['done'][-1] + except KeyError: + print('Algorithm done key in locals dict unknown') + if done: # store last episode metrics self.episode_metrics = {} self.episode_metrics['ep_length'] = self.ep_timesteps From 9932c4e71ac7e86f3c85df36ccd39dfc4e68de1b Mon Sep 17 00:00:00 2001 From: Alejandro Date: Tue, 1 Jun 2021 12:54:37 +0200 Subject: [PATCH 10/24] Added tensorboard compatibility with OnPolicy and OffPolicy algorithms --- energym/utils/callbacks.py | 39 ++++++++++++++++++++--------- examples/mlflow_A2C.py | 51 +++++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 18 deletions(-) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index 41678942fa..a65e5f36d1 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -17,8 +17,11 @@ class LoggerCallback(BaseCallback): Custom callback for plotting additional values in tensorboard. """ - def __init__(self, verbose=0): + def __init__(self, energym_logger=False, verbose=0): super(LoggerCallback, self).__init__(verbose) + + self.energym_logger = energym_logger + self.ep_rewards = [] self.ep_powers = [] self.ep_term_comfort = [] @@ -27,26 +30,38 @@ def __init__(self, verbose=0): self.ep_timesteps = 0 def _on_training_start(self): - self.training_env.env_method('deactivate_logger') + # energym logger + if self.energym_logger: + self.training_env.env_method('activate_logger') + else: + self.training_env.env_method('deactivate_logger') + + # record method depending on the type of algorithm + + if 'OnPolicyAlgorithm' in self.globals.keys(): + self.record = self.logger.record + elif 'OffPolicyAlgorithm' in self.globals.keys(): + self.record = self.logger.record_mean + else: + raise KeyError def _on_step(self) -> bool: # OBSERVATION variables = self.training_env.get_attr('variables')[0]['observation'] - # log normalized and original values if self.training_env.env_is_wrapped(wrapper_class=NormalizeObservation)[0]: obs_normalized = self.locals['new_obs'][-1] obs = self.training_env.env_method('get_unwrapped_obs')[-1] for i, variable in enumerate(variables): - self.logger.record( + self.record( 'normalized_observation/'+variable, obs_normalized[i]) - self.logger.record( + self.record( 'observation/'+variable, obs[i]) # Only original values else: obs = self.locals['new_obs'][-1] for i, variable in enumerate(variables): - self.logger.record( + self.record( 'observation/'+variable, obs[i]) # ACTION @@ -58,11 +73,11 @@ def _on_step(self) -> bool: action = self.locals['action'][-1] except KeyError: print('Algorithm action key in locals dict unknown') - + if self.training_env.get_attr('flag_discrete')[0]: - action=self.training_env.get_attr('action_mapping')[0][action] + action = self.training_env.get_attr('action_mapping')[0][action] for i, variable in enumerate(variables): - self.logger.record( + self.record( 'action/'+variable, action[i]) # Store episode data @@ -84,10 +99,10 @@ def _on_step(self) -> bool: # If episode ends, store summary of episode and reset try: - done= self.locals['dones'][-1] + done = self.locals['dones'][-1] except KeyError: try: - done= self.locals['done'][-1] + done = self.locals['done'][-1] except KeyError: print('Algorithm done key in locals dict unknown') if done: @@ -109,7 +124,7 @@ def _on_step(self) -> bool: self.ep_term_energy) try: self.episode_metrics['comfort_violation_time(%)'] = self.num_comfort_violation / \ - self.ep_timesteps*100 + self.ep_timesteps*100 except ZeroDivisionError: self.episode_metrics['comfort_violation_time(%)'] = np.nan diff --git a/examples/mlflow_A2C.py b/examples/mlflow_A2C.py index 4c21d6efff..25080edec3 100644 --- a/examples/mlflow_A2C.py +++ b/examples/mlflow_A2C.py @@ -15,7 +15,9 @@ from energym.utils.callbacks import LoggerCallback, LoggerEvalCallback from energym.utils.wrappers import NormalizeObservation -from stable_baselines3 import A2C + +from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise +from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC from stable_baselines3.common.callbacks import EvalCallback, BaseCallback, CallbackList from stable_baselines3.common.vec_env import DummyVecEnv @@ -58,6 +60,27 @@ #### TRAINING #### # Build model + # model = DQN('MlpPolicy', env, verbose=1, + # learning_rate=.0001, + # buffer_size=1000000, + # learning_starts=50000, + # batch_size=32, + # tau=1.0, + # gamma=.99, + # train_freq=4, + # gradient_steps=1, + # target_update_interval=10000, + # exploration_fraction=.1, + # exploration_initial_eps=1.0, + # exploration_final_eps=.05, + # max_grad_norm=10, + # tensorboard_log='./tensorboard_log/') + # The noise objects for DDPG + # n_actions = env.action_space.shape[-1] + # action_noise = NormalActionNoise(mean=np.zeros( + # n_actions), sigma=0.1 * np.ones(n_actions)) + # model = DDPG("MlpPolicy", env, action_noise=action_noise, verbose=1, + # tensorboard_log='./tensorboard_log/') model = A2C('MlpPolicy', env, verbose=1, learning_rate=args.learning_rate, n_steps=args.n_steps, @@ -68,24 +91,40 @@ max_grad_norm=args.max_grad_norm, rms_prop_eps=args.rms_prop_eps, tensorboard_log='./tensorboard_log/') + # model = PPO('MlpPolicy', env, verbose=1, + # learning_rate=.0003, + # n_steps=2048, + # batch_size=64, + # n_epochs=10, + # gamma=.99, + # gae_lambda=.95, + # clip_range=.2, + # ent_coef=0, + # vf_coef=.5, + # max_grad_norm=.5, + # tensorboard_log='./tensorboard_log/') + + # model = SAC(policy='MlpPolicy', env=env, + # tensorboard_log='./tensorboard_log/') n_timesteps_episode = env.simulator._eplus_one_epi_len / \ env.simulator._eplus_run_stepsize timesteps = n_episodes * n_timesteps_episode + 501 env = DummyVecEnv([lambda: env]) - env.env_method('activate_logger') + # env.env_method('activate_logger') # Callbacks - freq = 8 # evaluate every N episodes + freq = 2 # evaluate every N episodes eval_callback = LoggerEvalCallback(env, best_model_save_path='./best_models/' + name + '/', log_path='./best_models/' + name + '/', eval_freq=n_timesteps_episode * freq, - deterministic=True, render=False, n_eval_episodes=2) - log_callback = LoggerCallback() + deterministic=True, render=False, n_eval_episodes=1) + log_callback = LoggerCallback(energym_logger=False) callback = CallbackList([log_callback, eval_callback]) # Training - model.learn(total_timesteps=timesteps, callback=callback) + model.learn(total_timesteps=timesteps, log_interval=500, + n_eval_episodes=1, eval_freq=freq, callback=callback) model.save(name) #### LOAD MODEL #### From 53c458fb870b031ca41e2318237dd5fdf3470ecd Mon Sep 17 00:00:00 2001 From: Alejandro Date: Tue, 1 Jun 2021 13:05:21 +0200 Subject: [PATCH 11/24] Rename files that start with test word but they aren't test to be executed by pytest - close #42 --- examples/{test_mlflow.py => try_mlflow.py} | 0 examples/{test_wrappers.py => try_wrappers.py} | 7 ++++--- test_env.py => try_env.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) rename examples/{test_mlflow.py => try_mlflow.py} (100%) rename examples/{test_wrappers.py => try_wrappers.py} (74%) rename test_env.py => try_env.py (92%) diff --git a/examples/test_mlflow.py b/examples/try_mlflow.py similarity index 100% rename from examples/test_mlflow.py rename to examples/try_mlflow.py diff --git a/examples/test_wrappers.py b/examples/try_wrappers.py similarity index 74% rename from examples/test_wrappers.py rename to examples/try_wrappers.py index 781f4343f1..48b980507b 100644 --- a/examples/test_wrappers.py +++ b/examples/try_wrappers.py @@ -18,8 +18,9 @@ a = env.action_space.sample() obs, reward, done, info = env.step(a) rewards.append(reward) - if info['month'] != current_month: # display results every month + if info['month'] != current_month: # display results every month current_month = info['month'] print('Reward: ', sum(rewards), info) - print('Episode ', i, 'Mean reward: ', np.mean(rewards), 'Cumulative reward: ', sum(rewards)) -env.close() \ No newline at end of file + print('Episode ', i, 'Mean reward: ', np.mean( + rewards), 'Cumulative reward: ', sum(rewards)) +env.close() diff --git a/test_env.py b/try_env.py similarity index 92% rename from test_env.py rename to try_env.py index 984e5ea5e9..a66ed1e3dd 100644 --- a/test_env.py +++ b/try_env.py @@ -2,7 +2,7 @@ import energym import numpy as np -env = gym.make('Eplus-demo-v1') +env = gym.make('Eplus-continuous-hot-v1') for i in range(1): obs = env.reset() rewards = [] From 0aacbca9c5902d5e612701cbac40d218d6fccfde Mon Sep 17 00:00:00 2001 From: Alejandro Date: Tue, 1 Jun 2021 14:35:38 +0200 Subject: [PATCH 12/24] Updated CSV Logger and added docstring documentation --- energym/envs/eplus_env.py | 4 +- energym/utils/common.py | 95 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 9 deletions(-) diff --git a/energym/envs/eplus_env.py b/energym/envs/eplus_env.py index 9b2e98ee07..6e66bfcf79 100644 --- a/energym/envs/eplus_env.py +++ b/energym/envs/eplus_env.py @@ -110,12 +110,12 @@ def __init__( # Headers for csv loggers monitor_header_list = ['timestep,month,day,hour']+self.variables['observation'] + \ self.variables['action']+['time (seconds)', 'reward', - 'total_power_no_units', 'comfort_penalty', 'done'] + 'power_penalty', 'comfort_penalty', 'done'] self.monitor_header = '' for element_header in monitor_header_list: self.monitor_header += element_header+',' self.monitor_header = self.monitor_header[:-1] - self.progress_header = 'episode,cumulative_reward,mean_reward,mean_power_consumption,comfort_violation (%),num_timesteps,time_elapsed' + self.progress_header = 'episode_num,cumulative_reward,mean_reward,cumulative_power_consumption,mean_power_consumption,cumulative_comfort_penalty,mean_comfort_penalty,cumulative_power_penalty,mean_power_penalty,comfort_violation (%),length(timesteps),time_elapsed(seconds)' # Create simulation logger, by default is active (flag=True) self.logger = CSVLogger(monitor_header=self.monitor_header, progress_header=self.progress_header, diff --git a/energym/utils/common.py b/energym/utils/common.py index d76f92075e..7f20e599cd 100644 --- a/energym/utils/common.py +++ b/energym/utils/common.py @@ -220,7 +220,7 @@ def create_variable_weather(weather_data, original_epw_file, columns: list = ['d def ranges_getter(output_path, last_result=None): - """Given a path with simulations outputs, this function is used to extract max and min absolute valors of all episodes in each variable. If a dict ranges is given, will be updated + """Given a path with simulations outputs, this function is used to extract max and min absolute valors of all episodes in each variable. If a dict ranges is given, will be updated. Args: output_path (str): path with simulations directories (Eplus-env-*). @@ -257,7 +257,22 @@ def ranges_getter(output_path, last_result=None): class Logger(): + """ + Energym terminal logger for simulation executions. + """ + def getLogger(self, name, level, formatter): + """Return Energym logger for the progress output in terminal. + + Args: + name (str): logger name + level (str): logger level + formatter (str): logger formatter + + Returns: + logging.logger + + """ logger = logging.getLogger(name) consoleHandler = logging.StreamHandler() consoleHandler.setFormatter(logging.Formatter(formatter)) @@ -268,6 +283,20 @@ def getLogger(self, name, level, formatter): class CSVLogger(object): + """CSV Logger for agent interaction with environment. + + :param monitor_header: CSV header for sub_run_N/monitor.csv which record interaction step by step. + :param progress_header: CSV header for res_N/progress.csv which record main data episode by episode. + :param log_file: log_file path for monitor.csv, there will be one CSV per episode. + :param log_progress_file: log_file path for progress.csv, there will be only one CSV per whole simulation. + :param flag: This flag is used to activate (True) or deactivate (False) Logger in real time. + :param steps_data, rewards, powers, etc: These arrays are used to record steps data to elaborate main data for progress.csv later. + :param total_timesteps: Current episode timesteps executed. + :param total_time_elapsed: Current episode time elapsed (simulation seconds). + :param comfort_violation_timesteps: Current episode timesteps whose comfort_penalty!=0. + :param steps_data: It is a array of str's. Each element belong to a step data. + """ + def __init__(self, monitor_header, progress_header, log_progress_file, log_file=None, flag=True): self.monitor_header = monitor_header @@ -280,11 +309,27 @@ def __init__(self, monitor_header, progress_header, log_progress_file, log_file= self.steps_data = [self.monitor_header.split(',')] self.rewards = [] self.powers = [] + self.comfort_penalties = [] + self.power_penalties = [] self.total_timesteps = 0 self.total_time_elapsed = 0 self.comfort_violation_timesteps = 0 def log_step(self, timestep, date, observation, action, simulation_time, reward, total_power_no_units, comfort_penalty, power, done): + """Log step information and store it in steps_data param. + + Args: + timestep (int): Current episode timestep in simulation. + date (list): Current date [month,day,hour] in simulation. + observation (list): Values that belong to current observation. + action (list): Values that belong to current action. + simulation_time (float): Total time elapsed in current episode (seconds). + reward (float): Current reward achieved. + total_power_no_units (float): Power consumption penalty depending on reward function. + comfort_penalty (float): Temperature comfort penalty depending on reward function. + power (float): Power consumption in current step (W). + done (bool): Spicifies if this step terminates episode or not. + """ if self.flag: row_contents = [timestep] + list(date) + list(observation) + \ list(action) + [simulation_time, reward, @@ -293,23 +338,32 @@ def log_step(self, timestep, date, observation, action, simulation_time, reward, # Store step information for episode self._store_step_information( - reward, power, comfort_penalty, timestep, simulation_time) + reward, power, comfort_penalty, total_power_no_units, timestep, simulation_time) else: pass def log_episode(self, episode): + """Log episode main information using steps_data param. + + Args: + episode (int): Current simulation episode number. + """ if self.flag: # statistics metrics for whole episode ep_mean_reward = np.mean(self.rewards) - ep_total_reward = np.sum(self.rewards) + ep_cumulative_reward = np.sum(self.rewards) + ep_cumulative_power = np.sum(self.powers) ep_mean_power = np.mean(self.powers) + ep_cumulative_comfort_penalty = np.sum(self.comfort_penalties) + ep_mean_comfort_penalty = np.mean(self.comfort_penalties) + ep_cumulative_power_penalty = np.sum(self.power_penalties) + ep_mean_power_penalty = np.mean(self.power_penalties) try: comfort_violation = ( self.comfort_violation_timesteps/self.total_timesteps*100) except ZeroDivisionError: comfort_violation = np.nan - # write steps_info in monitor.csv with open(self.log_file, 'w', newline='') as file_obj: # Create a writer object from csv module @@ -323,8 +377,8 @@ def log_episode(self, episode): file_obj.write(self.progress_header) # building episode row - row_contents = [episode, ep_total_reward, ep_mean_reward, ep_mean_power, comfort_violation, - self.total_timesteps, self.total_time_elapsed] + row_contents = [episode, ep_cumulative_reward, ep_mean_reward, ep_cumulative_power, ep_mean_power, ep_cumulative_comfort_penalty, + ep_mean_comfort_penalty, ep_cumulative_power_penalty, ep_mean_power_penalty, comfort_violation, self.total_timesteps, self.total_time_elapsed] with open(self.log_progress_file, 'a+', newline='') as file_obj: # Create a writer object from csv module csv_writer = csv.writer(file_obj) @@ -337,6 +391,11 @@ def log_episode(self, episode): pass def set_log_file(self, new_log_file): + """Change log_file path for monitor.csv when an episode ends. + + Args: + new_log_file (str): New log path depending on simulation. + """ if self.flag: self.log_file = new_log_file if self.log_file: @@ -345,26 +404,48 @@ def set_log_file(self, new_log_file): else: pass - def _store_step_information(self, reward, power, comfort_penalty, timestep, simulation_time): + def _store_step_information(self, reward, power, comfort_penalty, power_penalty, timestep, simulation_time): + """Store relevant data to episode summary in progress.csv. + + Args: + reward (float): Current reward achieved. + power (float): Power consumption in current step (W). + comfort_penalty (float): Temperature comfort penalty depending on reward function. + power_penalty (float): Power consumption penalty depending on reward function. + timestep (int): Current episode timestep in simulation. + simulation_time (float): Total time elapsed in current episode (seconds). + """ if reward is not None: self.rewards.append(reward) if power is not None: self.powers.append(power) + if comfort_penalty is not None: + self.comfort_penalties.append(comfort_penalty) + if power_penalty is not None: + self.power_penalties.append(power_penalty) if comfort_penalty != 0: self.comfort_violation_timesteps += 1 self.total_timesteps = timestep self.total_time_elapsed = simulation_time def _reset_logger(self): + """Reset relevant data to next episode summary in progress.csv. + """ self.steps_data = [self.monitor_header.split(',')] self.rewards = [] self.powers = [] + self. comfort_penalties = [] + self.power_penalties = [] self.total_timesteps = 0 self.total_time_elapsed = 0 self.comfort_violation_timesteps = 0 def activate_flag(self): + """Activate Energym CSV logger + """ self.flag = True def deactivate_flag(self): + """Deactivate Energym CSV logger + """ self.flag = False From d95548aa48d42170266e01987125df3a75881733 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Tue, 1 Jun 2021 14:36:20 +0200 Subject: [PATCH 13/24] Updated documentation output format for progress.csv logger --- doc/source/pages/output.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/pages/output.rst b/doc/source/pages/output.rst index 3c5b9e5de2..10c833b5d0 100644 --- a/doc/source/pages/output.rst +++ b/doc/source/pages/output.rst @@ -26,6 +26,6 @@ When a simulation is run, this generate a directory called `Eplus-env- - A copy of **socket.cfg** and **utilSocket.idf** which are being used in order to communication interface with Energyplus during simulation. - **monitor.csv**: This records all interactions Agent-Enviroment during the episode timestep by timestep, the format is: *timestep, observation_values, action_values, simulation_time (seconds), reward, done*. - **output/**: This directory has EnergyPlus environment output. -- **progress.csv**: This file has information about general simulation results. There is a row per episode and it records most important data. Currently, the format is: *episode,mean_reward,cumulative_reward,num_timestep,time_elapsed*. +- **progress.csv**: This file has information about general simulation results. There is a row per episode and it records most important data. Currently, the format is: *episode_num,cumulative_reward,mean_reward,cumulative_power_consumption,mean_power_consumption,cumulative_comfort_penalty,mean_comfort_penalty,cumulative_power_penalty,mean_power_penalty,comfort_violation (%),length(timesteps),time_elapsed(seconds)*. .. note:: For more information about EnergyPlus output, visit `EnegyPlus documentation `__. From 3398d3f5a4fcffb529ec1476eb81b7883d3d5542 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 2 Jun 2021 12:39:10 +0200 Subject: [PATCH 14/24] Added tests for Stable-baselines3 DDPG and SAC with energym --- tests/test_stable_baselines.py | 102 ++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/tests/test_stable_baselines.py b/tests/test_stable_baselines.py index 5268302a77..0ea3cc3f3c 100644 --- a/tests/test_stable_baselines.py +++ b/tests/test_stable_baselines.py @@ -7,6 +7,7 @@ import pytest import stable_baselines3 +from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise TIMESTEPS = 1000 @@ -135,7 +136,6 @@ def test_stable_DQN(env_name, request): exploration_initial_eps=1.0, exploration_final_eps=.05, max_grad_norm=10) - pass else: model = stable_baselines3.DQN('MlpPolicy', env, verbose=1, @@ -174,3 +174,103 @@ def test_stable_DQN(env_name, request): assert info['timestep'] == 1 env.close() + + +@pytest.mark.parametrize( + 'env_name', + [ + ( + 'env_demo_discrete' + ), + ( + 'env_demo_continuous' + ), + ] +) +def test_stable_DDPG(env_name, request): + + env = request.getfixturevalue(env_name) + # DDPG must fail in discrete environments + if env_name == 'env_demo_discrete': + with pytest.raises(IndexError): + env.action_space.shape[-1] + with pytest.raises(AssertionError): + model = stable_baselines3.DDPG( + "MlpPolicy", env, verbose=1) + else: + # Action noise + n_actions = env.action_space.shape[-1] + action_noise = NormalActionNoise(mean=np.zeros( + n_actions), sigma=0.1 * np.ones(n_actions)) + # model + model = stable_baselines3.DDPG( + "MlpPolicy", env, action_noise=action_noise, verbose=1) + + model.learn(total_timesteps=TIMESTEPS) + + # Check model state + assert model.action_space == env.action_space + assert model.env.action_space == env.action_space + + assert type( + model.policy) == stable_baselines3.td3.policies.TD3Policy + + # Check model works + + obs = env.reset() + a, _ = model.predict(obs) + obs, reward, done, info = env.step(a) + + assert reward is not None and reward < 0 + assert a is not None + assert type(done) == bool + assert info['timestep'] == 1 + + env.close() + + +@pytest.mark.parametrize( + 'env_name', + [ + ( + 'env_demo_discrete' + ), + ( + 'env_demo_continuous' + ), + ] +) +def test_stable_SAC(env_name, request): + env = request.getfixturevalue(env_name) + # SAC must fail in discrete environments + if env_name == 'env_demo_discrete': + with pytest.raises(AssertionError): + model = stable_baselines3.SAC( + "MlpPolicy", env, verbose=1) + else: + # model + model = stable_baselines3.SAC( + "MlpPolicy", env, verbose=1) + + model.learn(total_timesteps=TIMESTEPS) + + # Check model state + assert model.action_space == env.action_space + assert model.env.action_space == env.action_space + + print(type(model.policy)) + assert type( + model.policy) == stable_baselines3.sac.policies.SACPolicy + + # Check model works + + obs = env.reset() + a, _ = model.predict(obs) + obs, reward, done, info = env.step(a) + + assert reward is not None and reward < 0 + assert a is not None + assert type(done) == bool + assert info['timestep'] == 1 + + env.close() From a8ec3d784ff7772c548bde4fa3520188164d5880 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Mon, 7 Jun 2021 11:06:14 +0200 Subject: [PATCH 15/24] Energym Logger log monitor_normalized.csv when env is wrapped with NormalizedObservation --- energym/envs/eplus_env.py | 2 +- energym/utils/common.py | 19 +++++++++++++++++++ energym/utils/wrappers.py | 17 +++++++++++++++++ tests/test_env.py | 25 +++++++++++++++++++------ 4 files changed, 56 insertions(+), 7 deletions(-) diff --git a/energym/envs/eplus_env.py b/energym/envs/eplus_env.py index 6e66bfcf79..12a6e53f91 100644 --- a/energym/envs/eplus_env.py +++ b/energym/envs/eplus_env.py @@ -150,7 +150,7 @@ def step(self, action): else: setpoints = action else: - print("ERROR: ", action) + print("ERROR: ", type(action)) action_ = list(setpoints) else: action_ = list(action) diff --git a/energym/utils/common.py b/energym/utils/common.py index 7f20e599cd..b3c78744cd 100644 --- a/energym/utils/common.py +++ b/energym/utils/common.py @@ -307,6 +307,7 @@ def __init__(self, monitor_header, progress_header, log_progress_file, log_file= # episode data self.steps_data = [self.monitor_header.split(',')] + self.steps_data_normalized = [self.monitor_header.split(',')] self.rewards = [] self.powers = [] self.comfort_penalties = [] @@ -342,6 +343,15 @@ def log_step(self, timestep, date, observation, action, simulation_time, reward, else: pass + def log_step_normalize(self, timestep, date, observation, action, simulation_time, reward, total_power_no_units, comfort_penalty, done): + if self.flag: + row_contents = [timestep] + list(date) + list(observation) + \ + list(action) + [simulation_time, reward, + total_power_no_units, comfort_penalty, done] + self.steps_data_normalized.append(row_contents) + else: + pass + def log_episode(self, episode): """Log episode main information using steps_data param. @@ -371,6 +381,14 @@ def log_episode(self, episode): # Add contents of list as last row in the csv file csv_writer.writerows(self.steps_data) + # Write normalize steps_info in monitor_normalized.csv + if len(self.steps_data_normalized) > 1: + with open(self.log_file[:-4]+'_normalized.csv', 'w', newline='') as file_obj: + # Create a writer object from csv module + csv_writer = csv.writer(file_obj) + # Add contents of list as last row in the csv file + csv_writer.writerows(self.steps_data_normalized) + # Create CSV file with header if it's required for progress.csv if not os.path.isfile(self.log_progress_file): with open(self.log_progress_file, 'a', newline='\n') as file_obj: @@ -432,6 +450,7 @@ def _reset_logger(self): """Reset relevant data to next episode summary in progress.csv. """ self.steps_data = [self.monitor_header.split(',')] + self.steps_data_normalized = [self.monitor_header.split(',')] self.rewards = [] self.powers = [] self. comfort_penalties = [] diff --git a/energym/utils/wrappers.py b/energym/utils/wrappers.py index 328a57386f..c3dddd7b3f 100644 --- a/energym/utils/wrappers.py +++ b/energym/utils/wrappers.py @@ -20,6 +20,23 @@ def __init__(self, env, ranges=RANGES_5ZONE): self.unwrapped_observation = None self.ranges = ranges + def step(self, action): + observation, reward, done, info = self.env.step(action) + normalized_obs = self.observation(observation) + if self.flag_discrete: + action_ = self.action_mapping[action] + self.logger.log_step_normalize(timestep=info['timestep'], + date=[info['month'], + info['day'], info['hour']], + observation=normalized_obs, + action=action_, + simulation_time=info['time_elapsed'], + reward=reward, + total_power_no_units=info['total_power_no_units'], + comfort_penalty=info['comfort_penalty'], + done=done) + return normalized_obs, reward, done, info + def observation(self, obs): """Applies normalization to observation. diff --git a/tests/test_env.py b/tests/test_env.py index 5589a0cf1e..ba78ce7b17 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,7 +1,7 @@ import pytest from random import randint import gym -import energym +import energym.utils.wrappers import os import csv from stable_baselines3.common.env_checker import check_env @@ -45,17 +45,24 @@ def test_close(env_demo): assert env_demo.simulator._conn == None -def test_loggers(env_demo): - - logger = env_demo.logger +@pytest.mark.parametrize('env_name', [('env_demo'), ('env_wrapper'), ]) +def test_loggers(env_name, request): + env = request.getfixturevalue(env_name) + logger = env.logger # Check CSV's have been created and linked in simulator correctly - assert logger.log_progress_file == env_demo.simulator._env_working_dir_parent+'/progress.csv' - assert logger.log_file == env_demo.simulator._eplus_working_dir+'/monitor.csv' + assert logger.log_progress_file == env.simulator._env_working_dir_parent+'/progress.csv' + assert logger.log_file == env.simulator._eplus_working_dir+'/monitor.csv' assert os.path.isfile(logger.log_progress_file) assert os.path.isfile(logger.log_file) + # If env is wrapped with normalize obs... + if(type(env) == energym.utils.wrappers.NormalizeObservation): + assert os.path.isfile(logger.log_file[:-4]+'_normalized.csv') + else: + assert not os.path.isfile(logger.log_file[:-4]+'_normalized.csv') + # Check headers with open(logger.log_file, mode='r', newline='') as csvfile: reader = csv.reader(csvfile, delimiter=',') @@ -67,6 +74,12 @@ def test_loggers(env_demo): for row in reader: assert ','.join(row)+'\n' == logger.progress_header break + if(type(env) == energym.utils.wrappers.NormalizeObservation): + with open(logger.log_file[:-4]+'_normalized.csv', mode='r', newline='') as csvfile: + reader = csv.reader(csvfile, delimiter=',') + for row in reader: + assert ','.join(row) == logger.monitor_header + break def test_all_environments(): From a361dc83ec69a0299845c86f833248d48aae4546 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Mon, 7 Jun 2021 12:25:34 +0200 Subject: [PATCH 16/24] Fixed logger bug for normalized observation (rows were bigger than CSV header) --- energym/utils/wrappers.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/energym/utils/wrappers.py b/energym/utils/wrappers.py index c3dddd7b3f..9b9787c2b9 100644 --- a/energym/utils/wrappers.py +++ b/energym/utils/wrappers.py @@ -22,9 +22,12 @@ def __init__(self, env, ranges=RANGES_5ZONE): def step(self, action): observation, reward, done, info = self.env.step(action) - normalized_obs = self.observation(observation) - if self.flag_discrete: + # Eliminate day,month, hour from observation + normalized_obs = self.observation(observation[:-3]) + if self.flag_discrete and type(action) == int: action_ = self.action_mapping[action] + else: + action_ = action self.logger.log_step_normalize(timestep=info['timestep'], date=[info['month'], info['day'], info['hour']], From b6401180c6ed8a1d6b1f0e9d4eb18b7bfd7a5d73 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Mon, 7 Jun 2021 15:51:31 +0200 Subject: [PATCH 17/24] Fixed another bug about observation shape --- energym/utils/wrappers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/energym/utils/wrappers.py b/energym/utils/wrappers.py index 9b9787c2b9..e240ce0860 100644 --- a/energym/utils/wrappers.py +++ b/energym/utils/wrappers.py @@ -22,16 +22,17 @@ def __init__(self, env, ranges=RANGES_5ZONE): def step(self, action): observation, reward, done, info = self.env.step(action) - # Eliminate day,month, hour from observation - normalized_obs = self.observation(observation[:-3]) + + normalized_obs = self.observation(observation) if self.flag_discrete and type(action) == int: action_ = self.action_mapping[action] else: action_ = action + # Eliminate day,month, hour from observation self.logger.log_step_normalize(timestep=info['timestep'], date=[info['month'], info['day'], info['hour']], - observation=normalized_obs, + observation=normalized_obs[:-3], action=action_, simulation_time=info['time_elapsed'], reward=reward, From 71c35000536d5c1ce8fc99791733eb07a6bbd5f4 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Tue, 8 Jun 2021 11:48:06 +0200 Subject: [PATCH 18/24] Callback register clipped action with environment action space instead of original network output --- energym/utils/callbacks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index a65e5f36d1..90e8329903 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -67,7 +67,8 @@ def _on_step(self) -> bool: # ACTION variables = self.training_env.get_attr('variables')[0]['action'] try: - action = self.locals['actions'][-1] + # action = self.locals['actions'][-1] + action = self.locals['clipped_actions'][-1] except KeyError: try: action = self.locals['action'][-1] From 5eac30a837946946ea646341ea30b4cd7ac73ed3 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Tue, 8 Jun 2021 11:49:04 +0200 Subject: [PATCH 19/24] Fixed wrappper bug action type --- energym/utils/wrappers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/energym/utils/wrappers.py b/energym/utils/wrappers.py index e240ce0860..8fea97d3a7 100644 --- a/energym/utils/wrappers.py +++ b/energym/utils/wrappers.py @@ -24,7 +24,7 @@ def step(self, action): observation, reward, done, info = self.env.step(action) normalized_obs = self.observation(observation) - if self.flag_discrete and type(action) == int: + if self.flag_discrete and np.issubdtype(type(action), np.integer): action_ = self.action_mapping[action] else: action_ = action From 6a8b31c0f5b3aeff53eff757cd33ee72725b539a Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 9 Jun 2021 11:01:36 +0200 Subject: [PATCH 20/24] Fix #43 - gym action space defined always in (-1,1) and setpoint range transformation for simulation implemented --- energym/envs/eplus_env.py | 25 ++++++++++++++++++++----- energym/utils/common.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/energym/envs/eplus_env.py b/energym/envs/eplus_env.py index 12a6e53f91..797814d23d 100644 --- a/energym/envs/eplus_env.py +++ b/energym/envs/eplus_env.py @@ -15,9 +15,10 @@ from opyplus import Epm, WeatherData -from ..utils.common import get_current_time_info, parse_variables, create_variable_weather, parse_observation_action_space, CSVLogger +from ..utils.common import get_current_time_info, parse_variables, create_variable_weather, parse_observation_action_space, setpoints_transform, CSVLogger from ..simulators import EnergyPlus from ..utils.rewards import SimpleReward +from pprint import pprint class EplusEnv(gym.Env): @@ -92,13 +93,24 @@ def __init__( # Action space self.flag_discrete = discrete_actions + + # Discrete if self.flag_discrete: self.action_mapping = discrete_action_def self.action_space = gym.spaces.Discrete(len(discrete_action_def)) + # Continuous else: + # Defining action values setpoints (one per value) + self.action_setpoints = [] + for i in range(len(self.variables['action'])): + # action_variable --> [low,up] + self.action_setpoints.append([ + continuous_action_def[0][i], continuous_action_def[1][i]]) + self.action_space = gym.spaces.Box( - low=np.array(continuous_action_def[0]), - high=np.array(continuous_action_def[1]), + # continuous_action_def[2] --> shape + low=np.repeat(-1, continuous_action_def[2][0]), + high=np.repeat(1, continuous_action_def[2][0]), dtype=continuous_action_def[3] ) @@ -153,7 +165,9 @@ def step(self, action): print("ERROR: ", type(action)) action_ = list(setpoints) else: - action_ = list(action) + # transform action to setpoints simulation + action_ = setpoints_transform( + action, self.action_space, self.action_setpoints) # Send action to the simulator self.simulator.logger_main.debug(action_) @@ -190,7 +204,8 @@ def step(self, action): 'total_power_no_units': terms['reward_energy'], 'comfort_penalty': terms['reward_comfort'], 'temperatures': temp_values, - 'out_temperature': obs_dict['Site Outdoor Air Drybulb Temperature (Environment)'] + 'out_temperature': obs_dict['Site Outdoor Air Drybulb Temperature (Environment)'], + 'action_': action_ } # Record action and new observation in simulator's csv diff --git a/energym/utils/common.py b/energym/utils/common.py index b3c78744cd..58e00bec77 100644 --- a/energym/utils/common.py +++ b/energym/utils/common.py @@ -7,6 +7,7 @@ from pydoc import locate import csv import pandas as pd +import gym from datetime import datetime, timedelta @@ -256,6 +257,36 @@ def ranges_getter(output_path, last_result=None): return result +def setpoints_transform(action, action_space: gym.spaces.Box, setpoints_space): + """Given an action inner gym action_space, this will be converted into an action inner setpoints_space (Energym Simulation). + + Args: + action (list): Action of a step in gym simulation. + action_space (gym.spaces.Box): Gym action space + setpoints_space (list): Energym simulation action space + + Returns: + tuple: Action transformed into simulation action space. + """ + + action_ = [] + + for i, value in enumerate(action): + if action_space.low[i] <= value <= action_space.high[i]: + a_max_min = action_space.high[i] - \ + action_space.low[i] + sp_max_min = setpoints_space[i][1] - \ + setpoints_space[i][0] + + action_.append( + setpoints_space[i][0] + (value - action_space.low[i]) * sp_max_min/a_max_min) + else: + # If action is outer action_space already, it don't need transformation + action_.append(value) + + return action_ + + class Logger(): """ Energym terminal logger for simulation executions. From 94f6ff54fa3e5ce67fdf1276443d3167d85e6260 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 9 Jun 2021 11:02:34 +0200 Subject: [PATCH 21/24] Modified tests for rule based controller according to new action space --- tests/conftest.py | 6 ------ tests/test_controller.py | 30 +++++++++++++++++++++++------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 984de6cb25..33fe812c09 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,6 @@ from energym.envs.eplus_env import EplusEnv import energym.utils.rewards as R from energym.utils.wrappers import NormalizeObservation, MultiObsWrapper -from energym.utils.controllers import RuleBasedController from opyplus import Epm, WeatherData import os @@ -99,11 +98,6 @@ def env_wrapper(env_demo): return MultiObsWrapper(env=NormalizeObservation(env=env_demo), n=5) -@pytest.fixture(scope='module') -def rule_controller_agent(env_demo): - return RuleBasedController(env_demo) - - ############### COMMONS ############### diff --git a/tests/test_controller.py b/tests/test_controller.py index 54bd64716d..a2b1523d7b 100644 --- a/tests/test_controller.py +++ b/tests/test_controller.py @@ -1,14 +1,30 @@ -import gym -import numpy as np +import pytest +from energym.utils.controllers import RuleBasedController -def test_rule_based_controller(rule_controller_agent, env_demo): - obs = env_demo.reset() +@pytest.mark.parametrize( + 'env_name', + [ + ( + 'env_demo_discrete' + ), + ( + 'env_demo_continuous' + ), + ] +) +def test_rule_based_controller(env_name, request): + env = request.getfixturevalue(env_name) + rule_based_agent = RuleBasedController(env) + obs = env.reset() + for i in range(3): - action = rule_controller_agent.act(obs) + action = rule_based_agent.act(obs) assert type(action) == tuple for value in action: assert value is not None - obs, reward, done, info = env_demo.step(action) + obs, reward, done, info = env.step(action) + + assert tuple(info['action_']) == action - env_demo.close() + env.close() From 8929aa41666e0fe5620634c0734acbb01dcb627e Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 9 Jun 2021 11:10:24 +0200 Subject: [PATCH 22/24] Adapted test_step for new info[action_] item --- tests/test_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_env.py b/tests/test_env.py index ba78ce7b17..eedf9a4bc2 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -21,7 +21,7 @@ def test_step(env_demo): assert type(reward) != None assert not done assert list(info.keys()) == ['timestep', 'time_elapsed', 'day', 'month', 'hour', 'total_power', - 'total_power_no_units', 'comfort_penalty', 'temperatures', 'out_temperature'] + 'total_power_no_units', 'comfort_penalty', 'temperatures', 'out_temperature', 'action_'] assert info['timestep'] == 1 assert info['time_elapsed'] == env_demo.simulator._eplus_run_stepsize * \ info['timestep'] @@ -33,7 +33,7 @@ def test_step(env_demo): assert type(reward) != None assert not done assert list(info.keys()) == ['timestep', 'time_elapsed', 'day', 'month', 'hour', 'total_power', - 'total_power_no_units', 'comfort_penalty', 'temperatures', 'out_temperature'] + 'total_power_no_units', 'comfort_penalty', 'temperatures', 'out_temperature', 'action_'] assert info['timestep'] == 2 assert info['time_elapsed'] == env_demo.simulator._eplus_run_stepsize * \ info['timestep'] From 6f9c6d0e1f6d5fe1eace0d1b43e951b61178d8ca Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 9 Jun 2021 12:21:56 +0200 Subject: [PATCH 23/24] Added action simulation for OnPolicyAlgorithm --- energym/utils/callbacks.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/energym/utils/callbacks.py b/energym/utils/callbacks.py index 90e8329903..55a36760a7 100644 --- a/energym/utils/callbacks.py +++ b/energym/utils/callbacks.py @@ -46,6 +46,8 @@ def _on_training_start(self): raise KeyError def _on_step(self) -> bool: + info = self.locals['infos'][-1] + # OBSERVATION variables = self.training_env.get_attr('variables')[0]['observation'] # log normalized and original values @@ -66,9 +68,12 @@ def _on_step(self) -> bool: # ACTION variables = self.training_env.get_attr('variables')[0]['action'] + action_ = None try: - # action = self.locals['actions'][-1] + # network output clipped with gym action space action = self.locals['clipped_actions'][-1] + # energym action received inner its own setpoints range + action_ = info['action_'] except KeyError: try: action = self.locals['action'][-1] @@ -80,9 +85,11 @@ def _on_step(self) -> bool: for i, variable in enumerate(variables): self.record( 'action/'+variable, action[i]) + if action_ is not None: + self.record( + 'action_simulation/'+variable, action_[i]) # Store episode data - info = self.locals['infos'][-1] try: self.ep_rewards.append(self.locals['rewards'][-1]) except KeyError: From 7ea92acfc2c7f2e5a80a919eb2eb5f32e0c79242 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 9 Jun 2021 12:22:24 +0200 Subject: [PATCH 24/24] V 1.0.0 energym --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3f951c312a..bd240ae73a 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ reqs = f.read().splitlines() setup(name='energym', - version='0.3.0', + version='1.0.0', install_requires=reqs, include_package_data=True, extras_require={