forked from ntu-adl-ta/ADL19-HW3
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenvironment.py
89 lines (67 loc) · 2.37 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""
### NOTICE ###
You DO NOT need to upload this file
"""
import gym
import numpy as np
from atari_wrapper import make_wrap_atari
class Environment(object):
def __init__(self, env_name, args, atari_wrapper=False, test=False):
if env_name.find('Mario') != -1:
from mario_env import create_mario_env
self.env = create_mario_env(env_name)
elif atari_wrapper:
clip_rewards = not test
self.env = make_wrap_atari(env_name, clip_rewards)
else:
self.env = gym.make(env_name)
self.action_space = self.env.action_space
self.observation_space = self.env.observation_space
self.do_render = args.do_render
if args.video_dir:
self.env = gym.wrappers.Monitor(self.env, args.video_dir, force=True)
def seed(self, seed):
'''
Control the randomness of the environment
'''
self.env.seed(seed)
def reset(self):
'''
When running dqn:
observation: np.array
stack 4 last frames, shape: (84, 84, 4)
When running pg:
observation: np.array
current state of the game, shape: (8)
'''
observation = self.env.reset()
return np.array(observation)
def step(self,action):
'''
When running dqn:
observation: np.array
stack 4 last preprocessed frames, shape: (84, 84, 4)
reward: int
wrapper clips the reward to {-1, 0, 1} by its sign
we don't clip the reward when testing
done: bool
whether reach the end of the episode?
When running pg:
observation: np.array
current state of the game, shape: (8)
reward: int
done: bool
whether reach the end of the episode?
'''
if not self.env.action_space.contains(action):
raise ValueError('Ivalid action!!')
if self.do_render:
self.env.render()
observation, reward, done, info = self.env.step(action)
return np.array(observation), reward, done, info
def get_action_space(self):
return self.action_space
def get_observation_space(self):
return self.observation_space
def get_random_action(self):
return self.action_space.sample()