-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsmall_random.py
168 lines (138 loc) · 5.12 KB
/
small_random.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
Author: Amy Zhang
Class: CS238 Decision Making Under Uncertainty
Date: 11/7/2020
Description: Simulates simplified version of Plants versus Zombies in a small 3x3 world with random planting policy.
This version takes place in a 3x3 world where the
home is on the left side of the grid and zombies enter on the right side of the grid. If a zombie enters the home, then
it eats your brains and you lose :( If you survive the night (20 time steps) then you win! Traditionally, the
player plants defensive plants that have the ability to kill the zombies in that row. Planting and zombie behavior are
modeled stochastically here to simulate a random planting policy.
"""
import pandas as pd
import numpy as np
import copy
def containsZombie(state):
return True if 10 in state or 11 in state or 20 in state else False
def loosingState(state):
home = [state[0], state[3], state[6]]
return containsZombie(home)
def moveZombies(state):
eaten = 0
if not containsZombie(state):
return eaten
else:
indices = [i for i, j in enumerate(state) if j == 10 or j == 11 or j == 20]
for index in indices:
if (state[index - 1] == 3):
# zombie ran into plant and ate it
eaten += 1
state[index - 1] = state[index]
state[index] = 0
return eaten
def addZombie(state):
eaten = 0
landing = [state[2], state[5], state[8]]
free = [j for j, k in enumerate(landing) if k == 0 or k == 3]
if len(free) == 0:
return eaten
else:
index = np.random.choice(free)
if state[2 + (3*index)] == 3:
# zombie ran into plant and ate it
eaten += 1
# add strong zombie with prob 0.5 and weak zombie with prob 0.5
state[2 + (3*index)] = np.random.choice([10, 20], p=[0.5, 0.5])
return eaten
def killZombies(state):
killed = 0
if not containsZombie(state):
return killed
for i in range(0, 7, 3):
row = state[i:i+3]
if containsZombie(row) and 3 in row:
zombie_indices = [j for j, k in enumerate(row) if k == 10 or k == 11 or k == 20]
plant_indices = [m for m, n in enumerate(row) if n == 3]
# if plant is facing zombie, then valid shot
if zombie_indices[0] > plant_indices[0]:
zombie = row[zombie_indices[0]]
# zombie is killed!
if zombie == 11 or zombie == 20:
state[i + zombie_indices[0]] = 0
killed += 1
# strong zombie is weakened
elif zombie == 10: # strong zombie is weakened
state[i + zombie_indices[0]] = 11
return killed
def simulateGame():
game = pd.DataFrame(columns = ['s', 'a', 'r', 'sp'])
state = [0 for i in range(9)]
# starting state always has one zombie
addZombie(state)
win = 0
#generates (state, action, reward, sp)
step = 0
while True:
state_curr = copy.copy(state)
tup = [state_curr]
# ACTION
free = [j for j, k in enumerate(state) if k == 0]
# if board is full, no choice but to wait
if len(free) == 0:
action = 9
else :
index = np.random.choice(free)
# give the humans a chance, let them always plant plant at first timestep
if step == 0:
state[index] = 3
# plant plant with prob 0.5 at each following timestep
else:
state[index] = np.random.choice([3, 0], p=[0.5, 0.5])
# action 9 corresponds to not planting aka placing 0
action = index if state[index] == 3 else 9
tup.append(action)
# NEXT STATE
killed = killZombies(state)
eaten = 0
# zombies are clumsy, they successfully step forward with prob 0.5
if np.random.random() < 0.5:
eaten += moveZombies(state)
# next state has new zombie with prob 0.7, zombies can only enter during night
if np.random.random() < 0.7 and step < 20:
eaten += addZombie(state)
# REWARD
reward = killed - eaten
# yikes the zombies win
if loosingState(state):
reward += -200
tup.append(reward)
tup.append(copy.copy(state))
game.loc[step] = tup
break
# wooo! survived the night and killed remaining zombies, humans win!
if step >= 20 and not containsZombie(state):
reward += 100
tup.append(reward)
tup.append(copy.copy(state))
game.loc[step] = tup
win = 1
break
tup.append(reward)
tup.append(copy.copy(state))
game.loc[step] = tup
step += 1
return game, win
def generateData():
data = pd.DataFrame(columns = ['s', 'a', 'r', 'sp'])
win_count = 0
for i in range(100):
game_tup = simulateGame()
data = data.append(game_tup[0])
win_count += game_tup[1]
print(win_count)
return data
def main():
data = generateData()
#data.to_csv("game_data1.csv", index=False)
if __name__ == "__main__":
main()