-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgym_interface.py
141 lines (93 loc) · 3.52 KB
/
gym_interface.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# coding: utf-8
# In[17]:
import numpy as np
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')
# This code creates a virtual display to draw game images on.
# If you are running locally, just ignore it
import os
if type(os.environ.get("DISPLAY")) is not str or len(os.environ.get("DISPLAY")) == 0:
get_ipython().system('bash ../xvfb start')
os.environ['DISPLAY'] = ':1'
# ### OpenAI Gym
#
# We're gonna spend several next weeks learning algorithms that solve decision processes. We are then in need of some interesting decision problems to test our algorithms.
#
# That's where OpenAI gym comes into play. It's a python library that wraps many classical decision problems including robot control, videogames and board games.
#
# So here's how it works:
# In[18]:
import gym
env = gym.make("MountainCar-v0")
env.reset()
plt.imshow(env.render('rgb_array'))
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)
# Note: if you're running this on your local machine, you'll see a window pop up with the image above. Don't close it, just alt-tab away.
# ### Gym interface
#
# The three main methods of an environment are
# * __reset()__ - reset environment to initial state, _return first observation_
# * __render()__ - show current environment state (a more colorful version :) )
# * __step(a)__ - commit action __a__ and return (new observation, reward, is done, info)
# * _new observation_ - an observation right after commiting the action __a__
# * _reward_ - a number representing your reward for commiting action __a__
# * _is done_ - True if the MDP has just finished, False if still in progress
# * _info_ - some auxilary stuff about what just happened. Ignore it ~~for now~~.
# In[19]:
obs0 = env.reset()
print("initial observation code:", obs0)
# Note: in MountainCar, observation is just two numbers: car position and velocity
# In[20]:
print("taking action 2 (right)")
new_obs, reward, is_done, _ = env.step(2)
print("new observation code:", new_obs)
print("reward:", reward)
print("is game over?:", is_done)
# Note: as you can see, the car has moved to the riht slightly (around 0.0005)
# ### Play with it
#
# Below is the code that drives the car to the right.
#
# However, it doesn't reach the flag at the far right due to gravity.
#
# __Your task__ is to fix it. Find a strategy that reaches the flag.
#
# You're not required to build any sophisticated algorithms for now, feel free to hard-code :)
#
# _Hint: your action at each step should depend either on __t__ or on __s__._
# In[24]:
# create env manually to set time limit. Please don't change this.
TIME_LIMIT = 250
env = gym.wrappers.TimeLimit(gym.envs.classic_control.MountainCarEnv(),
max_episode_steps=TIME_LIMIT + 1)
s = env.reset()
actions = {'left': 0, 'stop': 1, 'right': 2}
# prepare "display"
get_ipython().magic('matplotlib notebook')
fig = plt.figure()
ax = fig.add_subplot(111)
fig.show()
def policy(s, t):
# YOUR CODE HERE
print(s[1])
if (s[1] > 0):
return actions['right']
else:
return actions['left']
for t in range(TIME_LIMIT):
s, r, done, _ = env.step(policy(s, t))
#draw game image on display
ax.clear()
ax.imshow(env.render('rgb_array'))
fig.canvas.draw()
if done:
print("Well done!")
break
else:
print("Time limit exceeded. Try again.")
# ### Submit to coursera
# In[ ]:
from submit import submit_interface
submit_interface(policy, <EMAIL>, <TOKEN>)
# In[ ]: