diff --git a/grid2op/Environment.py b/grid2op/Environment.py index 903605bea..7a10ac333 100644 --- a/grid2op/Environment.py +++ b/grid2op/Environment.py @@ -206,7 +206,8 @@ def __init__(self, observationClass=CompleteObservation, rewardClass=FlatReward, legalActClass=AllwaysLegal, - epsilon_poly=1e-2): + epsilon_poly=1e-2, + tol_poly=1e-6): """ Initialize the environment. See the descirption of :class:`grid2op.Environment.Environment` for more information. @@ -233,6 +234,7 @@ def __init__(self, self._time_powerflow = 0 self._time_extract_obs = 0 self._epsilon_poly = epsilon_poly + self._tol_poly = tol_poly # define logger self.logger = None @@ -652,7 +654,7 @@ def _aux_redisp(self, redisp_act, target_p, avail_gen, previous_redisp): except_ = InvalidRedispatching("Impossible to perform this redispatching. Maximum ramp (or pmax) for " "available generators is not enough to absord " "{}MW, max possible is {}MW".format(val_sum, max_disp)) - elif np.abs(val_sum) <= 1e-6: + elif np.abs(val_sum) <= self._tol_poly: # i don't need to modify anything so i should be good new_redisp = 0.0 * redisp_act else: @@ -847,8 +849,8 @@ def _make_redisp_0sum(self, action, new_p): # get the target redispatching (cumulation starting from the first element of the scenario) self.target_dispatch += redisp_act_orig - if np.abs(np.sum(self.actual_dispatch)) >= 1e-6 or \ - np.sum(np.abs(self.actual_dispatch - self.target_dispatch)) >= 1e-6: + if np.abs(np.sum(self.actual_dispatch)) >= self._tol_poly or \ + np.sum(np.abs(self.actual_dispatch - self.target_dispatch)) >= self._tol_poly: # make sure the redispatching action is zero sum new_redisp, except_ = self._get_redisp_zero_sum(self.target_dispatch, self.gen_activeprod_t, @@ -1280,6 +1282,7 @@ def get_kwargs(self): res["rewardClass"] = self.rewardClass res["legalActClass"] = self.legalActClass res["epsilon_poly"] = self._epsilon_poly + res["tol_poly"] = self._tol_poly return res def get_params_for_runner(self): diff --git a/grid2op/data/test_PandaPower/prods_charac.csv b/grid2op/data/test_PandaPower/prods_charac.csv index e34e749d5..099c89548 100644 --- a/grid2op/data/test_PandaPower/prods_charac.csv +++ b/grid2op/data/test_PandaPower/prods_charac.csv @@ -1,6 +1,6 @@ Pmax,Pmin,name,type,bus,max_ramp_up,max_ramp_down,min_up_time,min_down_time,marginal_cost,shut_down_cost,start_cost,x,y,V -170.,0.,gen_0_4,thermal,170.,170.,170,0,0,10.0,0.,0.0,0,199,106.0 -60.,0.,gen_2_1,nuclear,3,60.,60,0.0,1.0,0.0,0,0.0,180,10,104.5 -100.,0.,gen_5_2,nuclear,3,100.,100.,0.0,0.0,0.0,0,0.0,646,10,101.0 -100.,0.,gen_7_3,nuclear,3,100.,100.,0.0,0.0,0.0,0,0.0,216,334,107.0 -40.,0.,gen_1_0,solar,3,40.,40.,0.0,0.0,0.0,0,0.0,718,280,109.0 +190.,0.,gen_0_4,thermal,170.,190.,190.,0,0,10.0,0.,0.0,0,199,106.0 +80.,0.,gen_2_1,nuclear,3,80.,80.,0.0,1.0,0.0,0,0.0,180,10,104.5 +120.,0.,gen_5_2,nuclear,3,120.,120.,0.0,0.0,0.0,0,0.0,646,10,101.0 +120.,0.,gen_7_3,nuclear,3,120.,120.,0.0,0.0,0.0,0,0.0,216,334,107.0 +50.,0.,gen_1_0,solar,3,50.,50.,0.0,0.0,0.0,0,0.0,718,280,109.0 diff --git a/grid2op/tests/test_Environment.py b/grid2op/tests/test_Environment.py index 33fa1e1a7..91e14abb5 100644 --- a/grid2op/tests/test_Environment.py +++ b/grid2op/tests/test_Environment.py @@ -172,7 +172,7 @@ def test_reward(self): cp.disable() cp.print_stats(sort="tottime") assert i == 287, "Wrong number of timesteps" - assert np.abs(cum_reward - 5739.929117641016) <= self.tol_one, "Wrong reward" + assert np.abs(cum_reward - 5739.92911) <= self.tol_one, "Wrong reward" class TestIllegalAmbiguous(unittest.TestCase): @@ -184,7 +184,7 @@ class TestIllegalAmbiguous(unittest.TestCase): def setUp(self): # powergrid self.tolvect = 1e-2 - self.tol_one = 1e-5 + self.tol_one = 1e-4 self.env = make("case5_example") def compare_vect(self, pred, true): diff --git a/grid2op/tests/test_RedispatchEnv.py b/grid2op/tests/test_RedispatchEnv.py index 68db4f4ba..ffc4ef155 100644 --- a/grid2op/tests/test_RedispatchEnv.py +++ b/grid2op/tests/test_RedispatchEnv.py @@ -110,7 +110,7 @@ def test_basic_redispatch_act(self): act = self.env.action_space({"redispatch": [2, 5]}) obs, reward, done, info = self.env.step(act) assert np.abs(np.sum(self.env.actual_dispatch)) <= self.tol_one - th_dispatch = np.array([0., -1.26379812, 5., 0., -3.73620188]) + th_dispatch = np.array([0., -1.44301856, 5., 0., -3.55698144]) assert self.compare_vect(self.env.actual_dispatch, th_dispatch) target_val = self.chronics_handler.real_data.prod_p[1, :] + self.env.actual_dispatch assert self.compare_vect(obs.prod_p[:-1], target_val[:-1]) # I remove last component which is the slack bus @@ -124,10 +124,10 @@ def test_basic_redispatch_act(self): def test_redispatch_act_above_pmax(self): # in this test, the asked redispatching for generator 2 would make it above pmax, so the environment # need to "cut" it automatically, without invalidating the action - act = self.env.action_space({"redispatch": [2, 20]}) + act = self.env.action_space({"redispatch": [2, 60]}) obs, reward, done, info = self.env.step(act) assert np.abs(np.sum(self.env.actual_dispatch)) <= self.tol_one - th_dispatch = np.array([0., -4.64121498, 20., 0., -15.35878502]) + th_dispatch = np.array([0., -10.46650072, 50.89066718, 0., -40.42416646]) assert self.compare_vect(self.env.actual_dispatch, th_dispatch) target_val = self.chronics_handler.real_data.prod_p[1, :] + self.env.actual_dispatch assert self.compare_vect(obs.prod_p[:-1], target_val[:-1]) # I remove last component which is the slack bus @@ -219,7 +219,7 @@ def test_redispacth_twice_same(self): obs, reward, done, info = self.env.step(act) assert np.all(obs.target_dispatch == np.array([ 0., 0., 5., 0., 0.])) assert np.abs(np.sum(obs.actual_dispatch)) <= self.tol_one - assert self.compare_vect(obs.actual_dispatch, np.array([0., -1.26379812, 5., 0., -3.73620188])) + assert self.compare_vect(obs.actual_dispatch, np.array([ 0., -1.44301856, 5., 0., -3.55698144])) assert np.all(obs.prod_p <= self.env.gen_pmax) assert np.all(obs.prod_p >= self.env.gen_pmin) @@ -227,7 +227,7 @@ def test_redispacth_twice_same(self): obs, reward, done, info = self.env.step(act) assert np.all(obs.target_dispatch == np.array([ 0., 0., 10., 0., 0.])) assert np.abs(np.sum(obs.actual_dispatch)) <= self.tol_one - assert self.compare_vect(obs.actual_dispatch, np.array([0., -2.45340249, 10., 0., -7.54659751])) + assert self.compare_vect(obs.actual_dispatch, np.array([0., -2.81339987, 10., 0., -7.18660013])) assert np.all(obs.prod_p <= self.env.gen_pmax) assert np.all(obs.prod_p >= self.env.gen_pmin) @@ -236,19 +236,19 @@ def test_redispacth_secondabovepmax(self): obs, reward, done, info = self.env.step(act) assert np.all(obs.target_dispatch == np.array([0., 0., 20., 0., 0.])) assert np.abs(np.sum(obs.actual_dispatch)) <= self.tol_one - assert self.compare_vect(obs.actual_dispatch, np.array([0., -4.64121498, 20., 0., -15.35878502])) + assert self.compare_vect(obs.actual_dispatch, np.array([0., -5.34776078, 20., 0., -14.65223922])) assert np.all(obs.prod_p <= self.env.gen_pmax) assert np.all(obs.prod_p >= self.env.gen_pmin) - act = self.env.action_space({"redispatch": [(2, 20.)]}) + act = self.env.action_space({"redispatch": [(2, 40.)]}) obs, reward, done, info = self.env.step(act) - assert np.all(obs.target_dispatch == np.array([0., 0., 40., 0., 0.])) - assert self.compare_vect(obs.actual_dispatch, np.array([0., -5.77603921, 30.39084553, 0., -24.61480632])) + assert np.all(obs.target_dispatch == np.array([0., 0., 60., 0., 0.])) + assert self.compare_vect(obs.actual_dispatch, np.array([0., -10.31164036, 50.39070301, 0., -40.07906265])) assert np.all(obs.prod_p[:-1] <= self.env.gen_pmax[:-1]) assert np.all(obs.prod_p[:-1] >= self.env.gen_pmin[:-1]) -# TODO test that if i try to redispatched a turned off generator it breaks everything +# # TODO test that if i try to redispatched a turned off generator it breaks everything class TestLoadingBackendPandaPower(unittest.TestCase): def setUp(self): # powergrid