-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCustomGridWorldV0.m
211 lines (186 loc) · 8.2 KB
/
CustomGridWorldV0.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
classdef CustomGridWorldV0 < matlab.System
% GridWorld represents a 12x12 deterministic grid world with 3 robots.
% Copyright 2020-2022 The MathWorks, Inc.
% Public, tunable properties
% properties
% % Initial position of robots
% InitialStates (1,2) double
%
%
% end
properties(Dependent)
% Initial position of robots
InitialStates (1,2) double
% CurrentState
% String with name of current state
% CurrentState string
% States
% String vector of the state names
States string
% Actions
% String vector with the action names
Actions string
% ObstacleStates
% String vector with the state names which can not be reached in
% the grid world.
ObstacleStates string
% TerminalStates
% String vector with the state names which are terminal in the grid
% world.
TerminalStates string
% add terminal
end
% Public, non-tunable properties
properties (Nontunable)
% Obstacle matrix
Obstacles double = -1
% Max step count
MaxStepCount (1,1) double = 500
%%%&&&&&&&&&&&&&&&&&&&
% TerminalStates (1,2) double % add terminal
%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
properties(DiscreteState)
% Discretized XY space with cells containing 0.5 or 1.0
% 0: unexplored
% 0.25: explored by robot A
% 0.50: explored by robot B
% 0.75: explored by robot B
% 1.0: obstacle
Grid
% States of robot A,B: [rowA colA; rowB colB]
% States
% Step count
StepCount
% Individual cell exploration count
% NumExploredCells
end
% Pre-computed constants
properties(Access = private)
% Handle to figure
% Figure
% Grid size [numrows numcols]
Size (1,2) double
end
methods
% Constructor
function this = CustomGridWorldV0(varargin)
% Support name-value pair arguments when constructing object
setProperties(this,nargin,varargin{:})
end
end
methods(Access = protected)
%% Common functions
function setupImpl(obj) %#ok<MANU>
% Perform one-time calculations, such as computing constants
end
function [observations,rewards,isdone] = stepImpl(obj,actions)
numRobots = 1;
% Rewards are:
% Agent moves to Terminal State: 10
% Agent tries to move out of grid: -10
% Agent collides with another agent: -10
% Agent collides with obstacle: -10
% Movement penalty: -1
% Lazy penalty: -2
% On full coverage: +4000 * coverage contribution
% move robots to their next state
%next_states = zeros(numRobots,2);
rewards = zeros(numRobots,1);
isdone = 0;
next_states = obj.States;
for idx = 1:numRobots
state = obj.States(idx,:);
action = actions(idx);
switch(action)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% case 0
% % Wait
% next_states(idx,:) = state;
% rewards(idx) = rewards(idx) - 10; % lazy penalty
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
case 1
% Move up North
if state(1) < obj.Size(1) && ~checkCollision(obj,state+[1 0],next_states(idx~=1:numRobots,:))
next_states(idx,:) = state + [1 0];
rewards(idx) = rewards(idx) - 1;
else
% add a negative reward for trying to go up
next_states(idx,:) = state;
rewards(idx) = rewards(idx) - 10;
end
case 2
% Move down South
if state(1) > 1 && ~checkCollision(obj,state+[-1 0],next_states(idx~=1:numRobots,:))
next_states(idx,:) = state + [-1 0];
rewards(idx) = rewards(idx) - 1;
else
% add a negative reward for trying to go down
next_states(idx,:) = state;
rewards(idx) = rewards(idx) - 10;
end
case 4
% Move left WEst
if state(2) > 1 && ~checkCollision(obj,state+[0 -1],next_states(idx~=1:numRobots,:))
next_states(idx,:) = state + [0 -1];
rewards(idx) = rewards(idx) - 1;
else
% add a negative reward for trying to go left
next_states(idx,:) = state;
rewards(idx) = rewards(idx) - 10;
end
case 3
% Move right East
if state(2) < obj.Size(2) && ~checkCollision(obj,state+[0 1],next_states(idx~=1:numRobots,:))
next_states(idx,:) = state + [0 1];
rewards(idx) = rewards(idx) - 1;
else
% add a negative reward for trying to go right
next_states(idx,:) = state;
rewards(idx) = rewards(idx) - 10;
end
case 5
% Move right-up NorthEast
if state(2) < obj.Size(2) && ~checkCollision(obj,state+[1 1],next_states(idx~=1:numRobots,:))
next_states(idx,:) = state + [1 1];
rewards(idx) = rewards(idx) - 1;
else
% add a negative reward for trying to go right
next_states(idx,:) = state;
rewards(idx) = rewards(idx) - 10;
end
case 6
% Move Left up NorthWest
if state(2) < obj.Size(2) && ~checkCollision(obj,state+[1 -1],next_states(idx~=1:numRobots,:))
next_states(idx,:) = state + [1 -1];
rewards(idx) = rewards(idx) - 1;
else
% add a negative reward for trying to go right
next_states(idx,:) = state;
rewards(idx) = rewards(idx) - 10;
end
case 7
% Move Right Down South East
if state(2) < obj.Size(2) && ~checkCollision(obj,state+[-1 1],next_states(idx~=1:numRobots,:))
next_states(idx,:) = state + [-1 1];
rewards(idx) = rewards(idx) - 1;
else
% add a negative reward for trying to go right
next_states(idx,:) = state;
rewards(idx) = rewards(idx) - 10;
end
case 8
% Move Left Down South West
if state(2) < obj.Size(2) && ~checkCollision(obj,state+[-1 -1],next_states(idx~=1:numRobots,:))
next_states(idx,:) = state + [-1 -1];
rewards(idx) = rewards(idx) - 1;
else
% add a negative reward for trying to go right
next_states(idx,:) = state;
rewards(idx) = rewards(idx) - 10;
end
end
end
end
end
end