-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
217 additions
and
17 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import torch.multiprocessing as mp | ||
from agents.dqn_agent import * | ||
|
||
|
||
class AsynDQNValueFunction(DQNValueFunction): | ||
def __init__(self, input_channel: int, action_dim: int, learning_rate: float, | ||
gamma: float, step_c: int, model_saving_period: int, device: torch.device, logger: Logger): | ||
super(AsynDQNValueFunction, self).__init__(input_channel, action_dim, learning_rate, | ||
gamma, step_c, model_saving_period, device, logger) | ||
self.value_nn.share_memory() | ||
self.target_value_nn.share_memory() | ||
|
||
|
||
class AsynDQNAgent(DQNAgent): | ||
def __init__(self, input_frame_width: int, input_frame_height: int, action_space, | ||
mini_batch_size: int, replay_buffer_size: int, replay_start_size: int, | ||
learning_rate: float, step_c: int, model_saving_period: int, | ||
gamma: float, training_episodes: int, phi_channel: int, epsilon_max: float, epsilon_min: float, | ||
exploration_steps: int, device: torch.device, logger: Logger): | ||
super(AsynDQNAgent, self).__init__(input_frame_width, input_frame_height, action_space, | ||
mini_batch_size, replay_buffer_size, replay_start_size, | ||
learning_rate, step_c, model_saving_period, | ||
gamma, training_episodes, phi_channel, epsilon_max, epsilon_min, | ||
exploration_steps, device, logger) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
from agents.async_dqn_agent import * | ||
from abc_rl.experience_replay import * | ||
from abc_rl.exploration import * | ||
from utils.hyperparameters import * | ||
|
||
import argparse | ||
from agents.dqn_agent import * | ||
from environments.env_wrapper import EnvWrapper | ||
from exploration.epsilon_greedy import * | ||
from utils.hyperparameters import Hyperparameters | ||
from tools.dqn_play_ground import DQNPlayGround | ||
import torch.multiprocessing as mp | ||
|
||
|
||
# Argument parser for command line arguments | ||
parser = argparse.ArgumentParser(description='PyTorch dqn training arguments') | ||
parser.add_argument('--env_name', default='ALE/Pong-v5', type=str, | ||
help='openai gym environment (default: ALE/Atlantis-v5)') | ||
parser.add_argument('--worker_num', default=4, type=int, | ||
help='parallel worker number (default: 4)') | ||
parser.add_argument('--device', default='cuda:0', type=str, | ||
help='calculation device default: cuda') | ||
parser.add_argument('--log_path', default='../exps/async_dqn/', type=str, | ||
help='log save path,default: ../exps/async_dqn/') | ||
|
||
# Load hyperparameters from yaml file | ||
cfg = Hyperparameters(parser, '../configs/async_dqn.yaml') | ||
|
||
|
||
def test(agent, test_episode_num: int): | ||
""" | ||
Test the DQN agent for a given number of episodes. | ||
:param test_episode_num: The number of episodes for testing | ||
:return: The average reward and average steps per episode | ||
""" | ||
env = EnvWrapper(cfg['env_name'], repeat_action_probability=0, frameskip=cfg['skip_k_frame']) | ||
exploration_method = EpsilonGreedy(cfg['epsilon_for_test']) | ||
reward_cum = 0 | ||
step_cum = 0 | ||
for i in range(test_episode_num): | ||
state, _ = env.reset() | ||
done = truncated = False | ||
step_i = 0 | ||
while (not done) and (not truncated): | ||
obs = agent.perception_mapping(state, step_i) | ||
action = agent.select_action(obs, exploration_method) | ||
next_state, reward, done, truncated, inf = env.step(action) | ||
reward_cum += reward | ||
state = next_state | ||
step_i += 1 | ||
step_cum += step_i | ||
return reward_cum / cfg['agent_test_episodes'], step_cum / cfg['agent_test_episodes'] | ||
|
||
|
||
def train(rank:int, agent: DQNAgent, env: EnvWrapper, | ||
training_steps_each_worker: int, | ||
no_op: int, batch_per_epoch:int): | ||
# training | ||
training_steps = 0 | ||
episode = 0 | ||
epoch_i = 0 | ||
run_test = False | ||
while training_steps < training_steps_each_worker: | ||
state, _ = env.reset() | ||
done = False | ||
truncated = False | ||
step_i = 0 | ||
reward_cumulated = 0 | ||
obs = agent.perception_mapping(state, step_i) | ||
while (not done) and (not truncated): | ||
# | ||
if step_i >= no_op: | ||
action = agent.select_action(obs) | ||
else: | ||
action = agent.select_action(obs, RandomAction()) | ||
next_state, reward_raw, done, truncated, inf = env.step(action) | ||
reward = agent.reward_shaping(reward_raw) | ||
next_obs = agent.perception_mapping(next_state, step_i) | ||
agent.store(obs, action, reward, next_obs, done, truncated) | ||
agent.train_step() | ||
if len(agent.memory) > 1000: | ||
agent.memory.clear() | ||
obs = next_obs | ||
reward_cumulated += reward | ||
training_steps += 1 | ||
step_i += 1 | ||
if rank==0 and training_steps % batch_per_epoch == 0: | ||
run_test = True | ||
epoch_i += 1 | ||
if rank == 0: | ||
agent.logger.msg(f'{training_steps} training reward: ' + str(reward_cumulated)) | ||
agent.logger.tb_scalar('training reward', reward_cumulated, training_steps) | ||
if run_test: | ||
agent.logger.msg(f'{epoch_i} test start:') | ||
avg_reward, avg_steps = test(agent, cfg['agent_test_episodes']) | ||
agent.logger.tb_scalar('avg_reward', avg_reward, epoch_i) | ||
agent.logger.tb_scalar('avg_steps', avg_steps, epoch_i) | ||
agent.logger.tb_scalar('epsilon', agent.exploration_method.epsilon, epoch_i) | ||
agent.logger.msg(f'{epoch_i} avg_reward: ' + str(avg_reward)) | ||
agent.logger.msg(f'{epoch_i} avg_steps: ' + str(avg_steps)) | ||
agent.logger.msg(f'{epoch_i} epsilon: ' + str(agent.exploration_method.epsilon)) | ||
|
||
episode += 1 | ||
|
||
|
||
class AsyncDQNPlayGround: | ||
def __init__(self, agent: AsynDQNAgent, env: list, cfg: Hyperparameters): | ||
self.agent = agent | ||
self.env_list = env | ||
self.cfg = cfg | ||
self.worker_num = cfg['worker_num'] | ||
self.training_steps_each_worker = int(self.cfg['training_steps'] / self.worker_num) | ||
|
||
def train(self): | ||
mp.set_start_method('spawn', force=True) | ||
processes = [] | ||
for rank in range(self.worker_num): | ||
p = mp.Process(target=train, args=(rank, self.agent, self.env_list[rank], | ||
self.training_steps_each_worker, | ||
self.cfg['no_op'], | ||
cfg['batch_num_per_epoch']/self.worker_num)) | ||
p.start() | ||
processes.append(p) | ||
for p in processes: | ||
p.join() | ||
|
||
|
||
|
||
|
||
|
||
def main(): | ||
logger = Logger(cfg['env_name'], cfg['log_path']) | ||
logger.msg('\nparameters:' + str(cfg)) | ||
envs = [EnvWrapper(cfg['env_name'], repeat_action_probability=0, | ||
frameskip=cfg['skip_k_frame']) | ||
for _ in range(cfg['worker_num'])] | ||
async_dqn_agent = AsynDQNAgent(cfg['input_frame_width'], cfg['input_frame_height'], envs[0].action_space, | ||
cfg['mini_batch_size'], cfg['replay_buffer_size'], cfg['replay_start_size'], | ||
cfg['learning_rate'], cfg['step_c'], cfg['agent_saving_period'], cfg['gamma'], | ||
cfg['training_steps'], cfg['phi_channel'], cfg['epsilon_max'], cfg['epsilon_min'], | ||
cfg['exploration_steps'], cfg['device'], logger) | ||
dqn_pg = AsyncDQNPlayGround(async_dqn_agent, envs, cfg) | ||
dqn_pg.train() | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
mini_batch_size: 32 | ||
batch_num_per_epoch: 1_000 | ||
replay_buffer_size: 1000 | ||
training_steps: 50_000_000 | ||
skip_k_frame: 4 | ||
phi_channel: 4 | ||
device: 'cuda: 0' | ||
input_frame_width: 84 | ||
input_frame_height: 84 | ||
replay_start_size: 100 | ||
gamma: 0.99 | ||
no_op: 30 | ||
save_path: './exps/' | ||
log_path: '../exps/dqn/' | ||
learning_rate: 0.00001 | ||
step_c: 10_000 | ||
epsilon_max: 1. | ||
epsilon_min: 0.1 | ||
exploration_steps: 1_000_000 | ||
epsilon_for_test: 0.05 | ||
agent_test_episodes: 20 | ||
agent_saving_period: 80000 |