diff --git a/examples/atari/reproduction/a3c/train_a3c.py b/examples/atari/reproduction/a3c/train_a3c.py index f4dc506eb..410a1af8c 100644 --- a/examples/atari/reproduction/a3c/train_a3c.py +++ b/examples/atari/reproduction/a3c/train_a3c.py @@ -16,7 +16,6 @@ def main(): - parser = argparse.ArgumentParser() parser.add_argument("--processes", type=int, default=16) parser.add_argument("--env", type=str, default="BreakoutNoFrameskip-v4") @@ -176,7 +175,6 @@ def phi(x): ) ) else: - # Linearly decay the learning rate to zero def lr_setter(env, agent, value): for pg in agent.optimizer.param_groups: diff --git a/examples/atari/train_acer_ale.py b/examples/atari/train_acer_ale.py index 686853ad9..d95cb1cca 100644 --- a/examples/atari/train_acer_ale.py +++ b/examples/atari/train_acer_ale.py @@ -19,7 +19,6 @@ def main(): - parser = argparse.ArgumentParser() parser.add_argument("processes", type=int) parser.add_argument("--env", type=str, default="BreakoutNoFrameskip-v4") @@ -185,7 +184,6 @@ def make_env(process_idx, test): ) ) else: - # Linearly decay the learning rate to zero def lr_setter(env, agent, value): for pg in agent.optimizer.param_groups: diff --git a/examples/atlas/train_soft_actor_critic_atlas.py b/examples/atlas/train_soft_actor_critic_atlas.py index 8b1a32505..76d147279 100644 --- a/examples/atlas/train_soft_actor_critic_atlas.py +++ b/examples/atlas/train_soft_actor_critic_atlas.py @@ -45,7 +45,6 @@ def make_env(args, seed, test): def main(): - parser = argparse.ArgumentParser() parser.add_argument( "--outdir", diff --git a/examples/gym/train_dqn_gym.py b/examples/gym/train_dqn_gym.py index 60b5ec932..b4a5c22a2 100644 --- a/examples/gym/train_dqn_gym.py +++ b/examples/gym/train_dqn_gym.py @@ -210,7 +210,6 @@ def make_env(idx=0, test=False): ) elif not args.actor_learner: - print( "WARNING: Since https://github.com/pfnet/pfrl/pull/112 we have started" " setting `eval_during_episode=True` in this script, which affects the" diff --git a/examples/mujoco/reproduction/ddpg/train_ddpg.py b/examples/mujoco/reproduction/ddpg/train_ddpg.py index 2f764fdf2..41932d354 100644 --- a/examples/mujoco/reproduction/ddpg/train_ddpg.py +++ b/examples/mujoco/reproduction/ddpg/train_ddpg.py @@ -22,7 +22,6 @@ def main(): - parser = argparse.ArgumentParser() parser.add_argument( "--outdir", diff --git a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py index bdfdd0e21..577ca881a 100644 --- a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py +++ b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py @@ -21,7 +21,6 @@ def main(): - parser = argparse.ArgumentParser() parser.add_argument( "--outdir", diff --git a/examples/mujoco/reproduction/td3/train_td3.py b/examples/mujoco/reproduction/td3/train_td3.py index f57bec25e..021388051 100644 --- a/examples/mujoco/reproduction/td3/train_td3.py +++ b/examples/mujoco/reproduction/td3/train_td3.py @@ -19,7 +19,6 @@ def main(): - parser = argparse.ArgumentParser() parser.add_argument( "--outdir", diff --git a/examples/mujoco/reproduction/trpo/train_trpo.py b/examples/mujoco/reproduction/trpo/train_trpo.py index c8a7715fe..f11a0a331 100644 --- a/examples/mujoco/reproduction/trpo/train_trpo.py +++ b/examples/mujoco/reproduction/trpo/train_trpo.py @@ -16,7 +16,6 @@ def main(): - parser = argparse.ArgumentParser() parser.add_argument( "--gpu", type=int, default=0, help="GPU device ID. Set to -1 to use CPUs only." @@ -215,7 +214,6 @@ def ortho_init(layer, gain): with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: - pfrl.experiments.train_agent_with_evaluation( agent=agent, env=env, diff --git a/pfrl/agents/a2c.py b/pfrl/agents/a2c.py index 615189a03..d0768f653 100644 --- a/pfrl/agents/a2c.py +++ b/pfrl/agents/a2c.py @@ -71,7 +71,6 @@ def __init__( average_value_decay=0.999, batch_states=batch_states, ): - self.model = model if gpu is not None and gpu >= 0: assert torch.cuda.is_available() diff --git a/pfrl/agents/a3c.py b/pfrl/agents/a3c.py index 0d620b786..90fb05e26 100644 --- a/pfrl/agents/a3c.py +++ b/pfrl/agents/a3c.py @@ -64,7 +64,6 @@ def __init__( average_value_decay=0.999, batch_states=batch_states, ): - # Globally shared model self.shared_model = model @@ -241,7 +240,6 @@ def observe(self, obs, reward, done, reset): self._observe_eval(obs, reward, done, reset) def _act_train(self, obs): - self.past_obs[self.t] = obs with torch.no_grad(): diff --git a/pfrl/agents/acer.py b/pfrl/agents/acer.py index 332946e13..5654ed62b 100644 --- a/pfrl/agents/acer.py +++ b/pfrl/agents/acer.py @@ -332,7 +332,6 @@ def __init__( average_kl_decay=0.999, logger=None, ): - # Globally shared model self.shared_model = model @@ -472,7 +471,6 @@ def compute_loss( action_distribs_mu, avg_action_distribs, ): - assert np.isscalar(R) pi_loss = 0 Q_loss = 0 @@ -566,7 +564,6 @@ def update( action_distribs_mu, avg_action_distribs, ): - assert np.isscalar(R) self.assert_shared_memory() @@ -595,7 +592,6 @@ def update( self.sync_parameters() def update_from_replay(self): - if self.replay_buffer is None: return @@ -715,7 +711,6 @@ def observe(self, obs, reward, done, reset): self._observe_eval(obs, reward, done, reset) def _act_train(self, obs): - statevar = batch_states([obs], self.device, self.phi) if self.recurrent: diff --git a/pfrl/agents/al.py b/pfrl/agents/al.py index a095169a9..164caa957 100644 --- a/pfrl/agents/al.py +++ b/pfrl/agents/al.py @@ -21,7 +21,6 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _compute_y_and_t(self, exp_batch): - batch_state = exp_batch["state"] batch_size = len(exp_batch["reward"]) diff --git a/pfrl/agents/ddpg.py b/pfrl/agents/ddpg.py index 9d2d15589..08c0748da 100644 --- a/pfrl/agents/ddpg.py +++ b/pfrl/agents/ddpg.py @@ -81,7 +81,6 @@ def __init__( batch_states=batch_states, burnin_action_func=None, ): - self.model = nn.ModuleList([policy, q_func]) if gpu is not None and gpu >= 0: assert torch.cuda.is_available() @@ -223,7 +222,6 @@ def update_from_episodes(self, episodes, errors_out=None): batches.append(batch) with self.model.state_reset(), self.target_model.state_reset(): - # Since the target model is evaluated one-step ahead, # its internal states need to be updated self.target_q_function.update_state( @@ -238,7 +236,6 @@ def update_from_episodes(self, episodes, errors_out=None): self.critic_optimizer.update(lambda: critic_loss / max_epi_len) with self.model.state_reset(): - # Update actor through time actor_loss = 0 for batch in batches: diff --git a/pfrl/agents/double_dqn.py b/pfrl/agents/double_dqn.py index 229379f69..6828b0acb 100644 --- a/pfrl/agents/double_dqn.py +++ b/pfrl/agents/double_dqn.py @@ -10,7 +10,6 @@ class DoubleDQN(dqn.DQN): """ def _compute_target_values(self, exp_batch): - batch_next_state = exp_batch["next_state"] with evaluating(self.model): diff --git a/pfrl/agents/double_pal.py b/pfrl/agents/double_pal.py index be6691a56..7cb5cfa9f 100644 --- a/pfrl/agents/double_pal.py +++ b/pfrl/agents/double_pal.py @@ -6,7 +6,6 @@ class DoublePAL(pal.PAL): def _compute_y_and_t(self, exp_batch): - batch_state = exp_batch["state"] batch_size = len(exp_batch["reward"]) diff --git a/pfrl/agents/dpp.py b/pfrl/agents/dpp.py index 792df64b9..485e05885 100644 --- a/pfrl/agents/dpp.py +++ b/pfrl/agents/dpp.py @@ -17,7 +17,6 @@ def _l_operator(self, qout): raise NotImplementedError() def _compute_target_values(self, exp_batch): - batch_next_state = exp_batch["next_state"] if self.recurrent: @@ -38,7 +37,6 @@ def _compute_target_values(self, exp_batch): ) def _compute_y_and_t(self, exp_batch): - batch_state = exp_batch["state"] batch_size = len(exp_batch["reward"]) diff --git a/pfrl/agents/dqn.py b/pfrl/agents/dqn.py index 740fcc2fa..8f61f08d2 100644 --- a/pfrl/agents/dqn.py +++ b/pfrl/agents/dqn.py @@ -3,7 +3,9 @@ import ctypes import multiprocessing as mp import multiprocessing.synchronize +import os import time +import typing from logging import Logger, getLogger from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple @@ -33,10 +35,11 @@ recurrent_state_as_numpy, ) +from pdb import set_trace def _mean_or_nan(xs: Sequence[float]) -> float: """Return its mean a non-empty sequence, numpy.nan for a empty one.""" - return np.mean(xs) if xs else np.nan + return typing.cast(float, np.mean(xs)) if xs else np.nan def compute_value_loss( @@ -485,6 +488,13 @@ def _evaluate_model_and_update_recurrent_states( batch_av = self.model(batch_xs) return batch_av + def compute_q(self, batch_obs: Sequence[Any], batch_action: Sequence[Any]) -> Sequence[Any]: + with torch.no_grad(), evaluating(self.model): + batch_av = self._evaluate_model_and_update_recurrent_states(batch_obs) + q_values = batch_av.q_values + batch_q_values = q_values[torch.arange(q_values.shape[0]), batch_action] + return batch_q_values + def batch_act(self, batch_obs: Sequence[Any]) -> Sequence[Any]: with torch.no_grad(), evaluating(self.model): batch_av = self._evaluate_model_and_update_recurrent_states(batch_obs) @@ -511,7 +521,6 @@ def _batch_observe_train( batch_done: Sequence[bool], batch_reset: Sequence[bool], ) -> None: - for i in range(len(batch_obs)): self.t += 1 self._cumulative_steps += 1 @@ -790,6 +799,24 @@ def stop_episode(self) -> None: if self.recurrent: self.test_recurrent_states = None + def save_snapshot(self, dirname: str) -> None: + self.save(dirname) + torch.save(self.t, os.path.join(dirname, "t.pt")) + torch.save(self.optim_t, os.path.join(dirname, "optim_t.pt")) + torch.save( + self._cumulative_steps, os.path.join(dirname, "_cumulative_steps.pt") + ) + self.replay_buffer.save(os.path.join(dirname, "replay_buffer.pkl")) + + def load_snapshot(self, dirname: str) -> None: + self.load(dirname) + self.t = torch.load(os.path.join(dirname, "t.pt")) + self.optim_t = torch.load(os.path.join(dirname, "optim_t.pt")) + self._cumulative_steps = torch.load( + os.path.join(dirname, "_cumulative_steps.pt") + ) + self.replay_buffer.load(os.path.join(dirname, "replay_buffer.pkl")) + def get_statistics(self): return [ ("average_q", _mean_or_nan(self.q_record)), diff --git a/pfrl/agents/pal.py b/pfrl/agents/pal.py index 1820588e6..ed62cb7b4 100644 --- a/pfrl/agents/pal.py +++ b/pfrl/agents/pal.py @@ -21,7 +21,6 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _compute_y_and_t(self, exp_batch): - batch_state = exp_batch["state"] batch_size = len(exp_batch["reward"]) diff --git a/pfrl/agents/ppo.py b/pfrl/agents/ppo.py index dae3c31a4..03b70c671 100644 --- a/pfrl/agents/ppo.py +++ b/pfrl/agents/ppo.py @@ -115,7 +115,6 @@ def _add_log_prob_and_value_to_episodes( obs_normalizer, device, ): - dataset = list(itertools.chain.from_iterable(episodes)) # Compute v_pred and next_v_pred @@ -533,7 +532,6 @@ def _update(self, dataset): self.n_updates += 1 def _update_once_recurrent(self, episodes, mean_advs, std_advs): - assert std_advs is None or std_advs > 0 device = self.device @@ -636,7 +634,6 @@ def _update_recurrent(self, dataset): def _lossfun( self, entropy, vs_pred, log_probs, vs_pred_old, log_probs_old, advs, vs_teacher ): - prob_ratio = torch.exp(log_probs - log_probs_old) loss_policy = -torch.mean( diff --git a/pfrl/agents/reinforce.py b/pfrl/agents/reinforce.py index 155ddcf15..c34baeb64 100644 --- a/pfrl/agents/reinforce.py +++ b/pfrl/agents/reinforce.py @@ -57,7 +57,6 @@ def __init__( max_grad_norm=None, logger=None, ): - self.model = model if gpu is not None and gpu >= 0: assert torch.cuda.is_available() @@ -103,7 +102,6 @@ def observe(self, obs, reward, done, reset): self._observe_eval(obs, reward, done, reset) def _act_train(self, obs): - batch_obs = self.batch_states([obs], self.device, self.phi) if self.recurrent: action_distrib, self.train_recurrent_states = one_step_forward( diff --git a/pfrl/agents/soft_actor_critic.py b/pfrl/agents/soft_actor_critic.py index a3048c93d..75e8ce98a 100644 --- a/pfrl/agents/soft_actor_critic.py +++ b/pfrl/agents/soft_actor_critic.py @@ -119,7 +119,6 @@ def __init__( temperature_optimizer_lr=None, act_deterministically=True, ): - self.policy = policy self.q_func1 = q_func1 self.q_func2 = q_func2 diff --git a/pfrl/agents/td3.py b/pfrl/agents/td3.py index 2596494e6..dc913f56d 100644 --- a/pfrl/agents/td3.py +++ b/pfrl/agents/td3.py @@ -101,7 +101,6 @@ def __init__( policy_update_delay=2, target_policy_smoothing_func=default_target_policy_smoothing_func, ): - self.policy = policy self.q_func1 = q_func1 self.q_func2 = q_func2 diff --git a/pfrl/agents/trpo.py b/pfrl/agents/trpo.py index 31d172cb4..613b6258e 100644 --- a/pfrl/agents/trpo.py +++ b/pfrl/agents/trpo.py @@ -193,7 +193,6 @@ def __init__( policy_step_size_stats_window=100, logger=getLogger(__name__), ): - self.policy = policy self.vf = vf self.vf_optimizer = vf_optimizer @@ -335,7 +334,6 @@ def _update_recurrent(self, dataset): self._update_vf_recurrent(dataset) def _update_vf_recurrent(self, dataset): - for epoch in range(self.vf_epochs): random.shuffle(dataset) for ( @@ -346,7 +344,6 @@ def _update_vf_recurrent(self, dataset): self._update_vf_once_recurrent(minibatch) def _update_vf_once_recurrent(self, episodes): - # Sort episodes desc by length for pack_sequence episodes = sorted(episodes, key=len, reverse=True) diff --git a/pfrl/experiments/train_agent.py b/pfrl/experiments/train_agent.py index a2e903f6e..81321b9ac 100644 --- a/pfrl/experiments/train_agent.py +++ b/pfrl/experiments/train_agent.py @@ -35,7 +35,6 @@ def train_agent( eval_during_episode=False, logger=None, ): - logger = logger or logging.getLogger(__name__) episode_r = 0 @@ -52,7 +51,6 @@ def train_agent( episode_len = 0 try: while t < steps: - # a_t action = agent.act(obs) # o_{t+1}, r_{t+1} diff --git a/pfrl/experiments/train_agent_async.py b/pfrl/experiments/train_agent_async.py index 02b34c908..ff19fa0de 100644 --- a/pfrl/experiments/train_agent_async.py +++ b/pfrl/experiments/train_agent_async.py @@ -41,7 +41,6 @@ def train_loop( logger=None, global_step_hooks=[], ): - logger = logger or logging.getLogger(__name__) if eval_env is None: @@ -55,7 +54,6 @@ def save_model(): logger.info("Saved the current model to %s", dirname) try: - episode_r = 0 global_t = 0 local_t = 0 @@ -65,7 +63,6 @@ def save_model(): successful = False while True: - # a_t a = agent.act(obs) # o_{t+1}, r_{t+1} diff --git a/pfrl/nn/noisy_linear.py b/pfrl/nn/noisy_linear.py index 7fa9aa0d4..df9fa9578 100644 --- a/pfrl/nn/noisy_linear.py +++ b/pfrl/nn/noisy_linear.py @@ -13,7 +13,6 @@ def init_lecun_uniform(tensor, scale=1.0): def init_variance_scaling_constant(tensor, scale=1.0): - if tensor.ndim == 1: s = scale / np.sqrt(tensor.shape[0]) else: diff --git a/pfrl/q_functions/state_action_q_functions.py b/pfrl/q_functions/state_action_q_functions.py index ae88e16af..be540f1bc 100644 --- a/pfrl/q_functions/state_action_q_functions.py +++ b/pfrl/q_functions/state_action_q_functions.py @@ -105,7 +105,7 @@ def __init__( num_layers=1, input_size=n_hidden_channels, hidden_size=n_hidden_channels ) self.out = nn.Linear(n_hidden_channels, 1) - for (n, p) in self.lstm.named_parameters(): + for n, p in self.lstm.named_parameters(): if "weight" in n: init_lecun_normal(p) else: diff --git a/pfrl/replay_buffer.py b/pfrl/replay_buffer.py index 5d2edea8e..03522eb73 100644 --- a/pfrl/replay_buffer.py +++ b/pfrl/replay_buffer.py @@ -316,7 +316,6 @@ def __init__( update_interval, episodic_update_len=None, ): - assert batchsize <= replay_start_size self.replay_buffer = replay_buffer self.update_func = update_func diff --git a/pfrl/replay_buffers/episodic.py b/pfrl/replay_buffers/episodic.py index 31e88b0e4..34ab517dd 100644 --- a/pfrl/replay_buffers/episodic.py +++ b/pfrl/replay_buffers/episodic.py @@ -7,7 +7,6 @@ class EpisodicReplayBuffer(AbstractEpisodicReplayBuffer): - # Implements AbstractReplayBuffer.capacity capacity: Optional[int] = None diff --git a/setup.py b/setup.py index 84ca77dba..0dcb7c8d5 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ install_requires = [ 'torch>=1.3.0', - 'gymnasium[all]', + 'gymnasium[atari]', 'numpy>=1.11.0', 'pillow', 'filelock', @@ -16,7 +16,7 @@ ] setup(name='pfrl', - version='0.3.0', + version='0.4.0', description='PFRL, a deep reinforcement learning library', long_description=codecs.open('README.md', 'r', encoding='utf-8').read(), long_description_content_type='text/markdown', diff --git a/tests/agents_tests/basetest_training.py b/tests/agents_tests/basetest_training.py index f7ccdbe03..730141ccc 100644 --- a/tests/agents_tests/basetest_training.py +++ b/tests/agents_tests/basetest_training.py @@ -33,7 +33,6 @@ def make_env_and_successful_return(self, test): raise NotImplementedError() def _test_training(self, gpu, steps=5000, load_model=False, require_success=True): - random_seed.set_random_seed(1) logging.basicConfig(level=logging.DEBUG) @@ -112,7 +111,6 @@ def make_vec_env_and_successful_return(self, test, num_envs=2): def _test_batch_training( self, gpu, steps=5000, load_model=False, require_success=True ): - random_seed.set_random_seed(1) logging.basicConfig(level=logging.DEBUG) @@ -183,7 +181,6 @@ class _TestActorLearnerTrainingMixin(object): """ def _test_actor_learner_training(self, gpu, steps=100000, require_success=True): - logging.basicConfig(level=logging.DEBUG) test_env, successful_return = self.make_env_and_successful_return(test=True) diff --git a/tests/agents_tests/test_a2c.py b/tests/agents_tests/test_a2c.py index a5d9f531b..e1fecf503 100644 --- a/tests/agents_tests/test_a2c.py +++ b/tests/agents_tests/test_a2c.py @@ -48,7 +48,6 @@ def test_abc_fast_gpu(self): self._test_abc(steps=100, require_success=False, gpu=0) def _test_abc(self, steps=1000000, require_success=True, gpu=-1, load_model=False): - env, _ = self.make_env_and_successful_return(test=False, n=self.num_processes) test_env, successful_return = self.make_env_and_successful_return( test=True, n=1 diff --git a/tests/agents_tests/test_a3c.py b/tests/agents_tests/test_a3c.py index 504c99a48..f3113fc53 100644 --- a/tests/agents_tests/test_a3c.py +++ b/tests/agents_tests/test_a3c.py @@ -119,7 +119,6 @@ def _test_abc( steps=100000, require_success=True, ): - nproc = 8 def make_env(process_idx, test): diff --git a/tests/agents_tests/test_acer.py b/tests/agents_tests/test_acer.py index 944412d40..82fefb5d4 100644 --- a/tests/agents_tests/test_acer.py +++ b/tests/agents_tests/test_acer.py @@ -273,7 +273,6 @@ def test_compute_loss_with_kl_constraint_softmax(): def _test_compute_loss_with_kl_constraint(base_policy): - # Train a policy with and without KL constraint against the original # distribution to confirm KL constraint works. @@ -351,7 +350,6 @@ def _test_abc( steps=100000, require_success=True, ): - nproc = 8 def make_env(process_idx, test): diff --git a/tests/agents_tests/test_categorical_dqn.py b/tests/agents_tests/test_categorical_dqn.py index 9a853556e..2555923ca 100644 --- a/tests/agents_tests/test_categorical_dqn.py +++ b/tests/agents_tests/test_categorical_dqn.py @@ -322,7 +322,6 @@ def test_not_weighted(self): assertions.assertAlmostEqual(loss, eltwise_loss, places=5) def test_uniformly_weighted(self): - # Uniform weights of size batch size w1 = np.ones(self.y.shape[0], dtype="f") @@ -344,7 +343,6 @@ def test_uniformly_weighted(self): assertions.assertAlmostEqual(loss_w1, eltwise_loss, places=5) def test_randomly_weighted(self): - # Random weights wu = np.random.uniform(low=0, high=2, size=self.y.shape[0]).astype("f") diff --git a/tests/agents_tests/test_ppo.py b/tests/agents_tests/test_ppo.py index cf2d55430..4efb5aed7 100644 --- a/tests/agents_tests/test_ppo.py +++ b/tests/agents_tests/test_ppo.py @@ -313,7 +313,6 @@ def test_abc_batch_fast_gpu(self): self._test_abc_batch(steps=100, require_success=False, gpu=0) def _test_abc(self, steps=100000, require_success=True, gpu=-1, load_model=False): - env, _ = self.make_env_and_successful_return(test=False) test_env, successful_return = self.make_env_and_successful_return(test=True) agent = self.make_agent(env, gpu) @@ -356,7 +355,6 @@ def _test_abc(self, steps=100000, require_success=True, gpu=-1, load_model=False def _test_abc_batch( self, steps=100000, require_success=True, gpu=-1, load_model=False, num_envs=4 ): - env, _ = self.make_vec_env_and_successful_return(test=False, num_envs=num_envs) test_env, successful_return = self.make_vec_env_and_successful_return( test=True, num_envs=num_envs diff --git a/tests/agents_tests/test_soft_actor_critic.py b/tests/agents_tests/test_soft_actor_critic.py index a37c5cdf4..df70ed962 100644 --- a/tests/agents_tests/test_soft_actor_critic.py +++ b/tests/agents_tests/test_soft_actor_critic.py @@ -65,7 +65,6 @@ def test_abc_batch_fast_gpu(self): self._test_abc_batch(steps=100, require_success=False, gpu=0) def _test_abc(self, steps=100000, require_success=True, gpu=-1, load_model=False): - env, _ = self.make_env_and_successful_return(test=False) test_env, successful_return = self.make_env_and_successful_return(test=True) @@ -110,7 +109,6 @@ def _test_abc(self, steps=100000, require_success=True, gpu=-1, load_model=False def _test_abc_batch( self, steps=100000, require_success=True, gpu=-1, load_model=False ): - env, _ = self.make_vec_env_and_successful_return(test=False) test_env, successful_return = self.make_vec_env_and_successful_return(test=True) diff --git a/tests/agents_tests/test_td3.py b/tests/agents_tests/test_td3.py index 39f62f194..5f9167b7a 100644 --- a/tests/agents_tests/test_td3.py +++ b/tests/agents_tests/test_td3.py @@ -63,7 +63,6 @@ def test_abc_batch_fast_gpu(self): self._test_abc_batch(steps=100, require_success=False, gpu=0) def _test_abc(self, steps=100000, require_success=True, gpu=-1, load_model=False): - env, _ = self.make_env_and_successful_return(test=False) test_env, successful_return = self.make_env_and_successful_return(test=True) @@ -108,7 +107,6 @@ def _test_abc(self, steps=100000, require_success=True, gpu=-1, load_model=False def _test_abc_batch( self, steps=100000, require_success=True, gpu=-1, load_model=False ): - env, _ = self.make_vec_env_and_successful_return(test=False) test_env, successful_return = self.make_vec_env_and_successful_return(test=True) diff --git a/tests/agents_tests/test_trpo.py b/tests/agents_tests/test_trpo.py index e6a2c0d2f..e65e62a5a 100644 --- a/tests/agents_tests/test_trpo.py +++ b/tests/agents_tests/test_trpo.py @@ -141,7 +141,6 @@ def test_abc_batch_fast_gpu(self): self._test_abc_batch(steps=100, require_success=False, gpu=0) def _test_abc(self, steps=100000, require_success=True, gpu=-1, load_model=False): - env, _ = self.make_env_and_successful_return(test=False) test_env, successful_return = self.make_env_and_successful_return(test=True) @@ -186,7 +185,6 @@ def _test_abc(self, steps=100000, require_success=True, gpu=-1, load_model=False def _test_abc_batch( self, steps=100000, require_success=True, gpu=-1, load_model=False, num_envs=4 ): - env, _ = self.make_vec_env_and_successful_return(test=False, num_envs=num_envs) test_env, successful_return = self.make_vec_env_and_successful_return( test=True, num_envs=num_envs diff --git a/tests/experiments_tests/test_evaluator.py b/tests/experiments_tests/test_evaluator.py index 4f6811ea1..3148edc1e 100644 --- a/tests/experiments_tests/test_evaluator.py +++ b/tests/experiments_tests/test_evaluator.py @@ -13,7 +13,6 @@ @pytest.mark.parametrize("n_steps", [None, 1, 2]) @pytest.mark.parametrize("n_episodes", [None, 1, 2]) def test_evaluator_evaluate_if_necessary(save_best_so_far_agent, n_steps, n_episodes): - outdir = tempfile.mkdtemp() # MagicMock can mock eval_mode while Mock cannot diff --git a/tests/experiments_tests/test_hooks.py b/tests/experiments_tests/test_hooks.py index 792b5511b..809ccb31b 100644 --- a/tests/experiments_tests/test_hooks.py +++ b/tests/experiments_tests/test_hooks.py @@ -7,7 +7,6 @@ class TestLinearInterpolationHook(unittest.TestCase): def test_call(self): - buf = [] def setter(env, agent, value): diff --git a/tests/experiments_tests/test_prepare_output_dir.py b/tests/experiments_tests/test_prepare_output_dir.py index 4e8ee5dc0..1344c6702 100644 --- a/tests/experiments_tests/test_prepare_output_dir.py +++ b/tests/experiments_tests/test_prepare_output_dir.py @@ -20,9 +20,7 @@ def work_dir(dirname): def test_is_under_git_control(): - with tempfile.TemporaryDirectory() as tmp: - # Not under git control with work_dir(tmp): assert not pfrl.experiments.is_under_git_control() @@ -37,7 +35,6 @@ def test_is_under_git_control(): def test_generate_exp_id(): - with tempfile.TemporaryDirectory() as tmp: with work_dir(tmp): subprocess.check_output(["git", "init"]) @@ -71,7 +68,6 @@ def test_generate_exp_id(): ), ) def test_prepare_output_dir(exp_id, git, basedir, argv): - with tempfile.TemporaryDirectory() as tmp: if not exp_id and not git: pytest.skip("Without git it cannot generate experiment id") @@ -80,7 +76,6 @@ def test_prepare_output_dir(exp_id, git, basedir, argv): os.environ["PFRL_TEST_PREPARE_OUTPUT_DIR"] = "T" with work_dir(tmp): - if git: subprocess.call(["git", "init"]) with open("not_utf-8.txt", "wb") as f: diff --git a/tests/experiments_tests/test_train_agent.py b/tests/experiments_tests/test_train_agent.py index 5ba306e88..d60249f57 100644 --- a/tests/experiments_tests/test_train_agent.py +++ b/tests/experiments_tests/test_train_agent.py @@ -9,7 +9,6 @@ class TestTrainAgent(unittest.TestCase): def test(self): - outdir = tempfile.mkdtemp() agent = mock.Mock() @@ -50,7 +49,6 @@ def test(self): self.assertEqual(args[2], i + 1) def test_needs_reset(self): - outdir = tempfile.mkdtemp() agent = mock.Mock() @@ -136,7 +134,6 @@ def __call__( @pytest.mark.parametrize("eval_during_episode", [False, True]) def test_eval_during_episode(eval_during_episode): - outdir = tempfile.mkdtemp() agent = mock.MagicMock() diff --git a/tests/experiments_tests/test_train_agent_async.py b/tests/experiments_tests/test_train_agent_async.py index cc3869acc..8437ffc01 100644 --- a/tests/experiments_tests/test_train_agent_async.py +++ b/tests/experiments_tests/test_train_agent_async.py @@ -13,7 +13,6 @@ @pytest.mark.parametrize("num_envs", [1, 2]) @pytest.mark.parametrize("max_episode_len", [None, 2]) def test_train_agent_async(num_envs, max_episode_len): - steps = 50 outdir = tempfile.mkdtemp() @@ -147,7 +146,6 @@ def __call__( class TestTrainLoop(unittest.TestCase): def test_needs_reset(self): - outdir = tempfile.mkdtemp() agent = mock.Mock() diff --git a/tests/experiments_tests/test_train_agent_batch.py b/tests/experiments_tests/test_train_agent_batch.py index b719cfac3..1e318c05b 100644 --- a/tests/experiments_tests/test_train_agent_batch.py +++ b/tests/experiments_tests/test_train_agent_batch.py @@ -13,7 +13,6 @@ @pytest.mark.parametrize("steps", [5, 6]) @pytest.mark.parametrize("enable_evaluation", [True, False]) def test_train_agent_batch(num_envs, max_episode_len, steps, enable_evaluation): - outdir = tempfile.mkdtemp() agent = mock.Mock() diff --git a/tests/explorers_tests/test_boltzmann.py b/tests/explorers_tests/test_boltzmann.py index f03d9eaee..98a93bc8b 100644 --- a/tests/explorers_tests/test_boltzmann.py +++ b/tests/explorers_tests/test_boltzmann.py @@ -24,7 +24,6 @@ def greedy_action_func(): class TestBoltzmann(unittest.TestCase): def test_boltzmann(self): - # T=1 q_values = torch.from_numpy(np.asarray([[-1, 1, 0]], dtype=np.float32)) action_count = count_actions_selected_by_boltzmann(1, q_values) diff --git a/tests/explorers_tests/test_epsilon_greedy.py b/tests/explorers_tests/test_epsilon_greedy.py index e14f6224d..f4bc0844f 100644 --- a/tests/explorers_tests/test_epsilon_greedy.py +++ b/tests/explorers_tests/test_epsilon_greedy.py @@ -51,7 +51,6 @@ def greedy_action_func(): class TestEpsilonGreedy: def test_linear_decay_epsilon_greedy(self): - random_action_func_count = [0] greedy_action_func_count = [0] @@ -80,7 +79,6 @@ def greedy_action_func(): assert pytest.approx(explorer.epsilon) == 0.1 def test_constant_epsilon_greedy(self): - random_action_func_count = [0] greedy_action_func_count = [0] diff --git a/tests/replay_buffers_test/test_replay_buffer.py b/tests/replay_buffers_test/test_replay_buffer.py index ba7962913..5c2e4a4b1 100644 --- a/tests/replay_buffers_test/test_replay_buffer.py +++ b/tests/replay_buffers_test/test_replay_buffer.py @@ -399,7 +399,6 @@ def test_append_and_sample(self): np.testing.assert_allclose(s4[0][0]["weight"], s4[1][0]["weight"]) def test_normalize_by_max(self): - rbuf = replay_buffers.PrioritizedReplayBuffer( self.capacity, normalize_by_max=self.normalize_by_max, diff --git a/tests/test_agent.py b/tests/test_agent.py index 9f9271148..3804958ea 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -14,7 +14,6 @@ def create_simple_link(): class Parent(pfrl.agent.AttributeSavingMixin, object): - saved_attributes = ("link", "child") def __init__(self): @@ -23,7 +22,6 @@ def __init__(self): class Child(pfrl.agent.AttributeSavingMixin, object): - saved_attributes = ("link",) def __init__(self): @@ -31,7 +29,6 @@ def __init__(self): class Parent2(pfrl.agent.AttributeSavingMixin, object): - saved_attributes = ("child_a", "child_b") def __init__(self, child_a, child_b): diff --git a/tests/utils_tests/test_batch_states.py b/tests/utils_tests/test_batch_states.py index cdfedcd84..5be50d706 100644 --- a/tests/utils_tests/test_batch_states.py +++ b/tests/utils_tests/test_batch_states.py @@ -9,7 +9,6 @@ class TestBatchStates(unittest.TestCase): def _test(self, gpu): - # state: ((2,2)-shaped array, integer, (1,)-shaped array) states = [ (np.arange(4).reshape((2, 2)), 0, np.zeros(1)), diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index f80d89b1e..963011bdd 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -245,7 +245,6 @@ def setup(self, pretrained_type): self.pretrained_type = pretrained_type def _test_load_ddpg(self, gpu): - obs_size = 11 action_size = 3 from pfrl.nn import ConcatObsAndAction @@ -443,7 +442,6 @@ def setup(self, pretrained_type): self.pretrained_type = pretrained_type def _test_load_td3(self, gpu): - obs_size = 11 action_size = 3 diff --git a/tests/wrappers_tests/test_atari_wrappers.py b/tests/wrappers_tests/test_atari_wrappers.py index ec0cbeffc..f0be506e0 100644 --- a/tests/wrappers_tests/test_atari_wrappers.py +++ b/tests/wrappers_tests/test_atari_wrappers.py @@ -15,7 +15,6 @@ @pytest.mark.parametrize("dtype", [np.uint8, np.float32]) @pytest.mark.parametrize("k", [2, 3]) def test_frame_stack(dtype, k): - steps = 10 # Mock env that returns atari-like frames @@ -87,7 +86,6 @@ def dtyped_rand(): @pytest.mark.parametrize("dtype", [np.uint8, np.float32]) def test_scaled_float_frame(dtype): - steps = 10 # Mock env that returns atari-like frames diff --git a/tests/wrappers_tests/test_vector_frame_stack.py b/tests/wrappers_tests/test_vector_frame_stack.py index 6e5919ca1..e739c1ed8 100644 --- a/tests/wrappers_tests/test_vector_frame_stack.py +++ b/tests/wrappers_tests/test_vector_frame_stack.py @@ -14,7 +14,6 @@ class TestVectorEnvWrapper(unittest.TestCase): def test(self): - vec_env = pfrl.envs.SerialVectorEnv([mock.Mock() for _ in range(3)]) wrapped_vec_env = VectorEnvWrapper(vec_env) @@ -28,7 +27,6 @@ def test(self): @pytest.mark.parametrize("num_envs", [1, 3]) @pytest.mark.parametrize("k", [2, 3]) def test_vector_frame_stack(num_envs, k): - steps = 10 # Mock env that returns atari-like frames