-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathP_data_stoch.m
80 lines (80 loc) · 1.94 KB
/
P_data_stoch.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
clear all;
close all;
clc;
%n = 10;
n_S = 80;
n_A = 8;
n_goto_s = 5;
P = zeros(n_S,n_A,n_S); % Transition Probabilities
for i=1:n_S
for j=1:n_A
goto_s = randperm(n_S,n_goto_s);
r = rand(n_goto_s,1);
r1 = r / sum(r);
for k=1:n_goto_s
P(i,j,goto_s(k)) = r1(k); % Transition dynamics of the MDP
end
end
end
%% Grid World
% i=n+2;
% prob = 1;
% k = 5;
% r_prob = [k*prob, prob];
% while(i<(n-1)*n)
% for a=1:n_A
% r = repmat(r_prob(2),n_A,1);
% r(a) = r_prob(1);
% r=r/sum(r);
% neigh_s = [-1,-n,1,n];
% for c_s=1:length(neigh_s)
% P(i,a,i+neigh_s(c_s)) = r(c_s);
% end
% end
% if(mod(i,n)==n-1)
% i=i+3;
% else
% i=i+1;
% end
% end
% i=1;
% while(i<=n_S)
% if(mod(i,n)==1 && i+n<=n_S && i-n>0)
% neigh_s = [-n,1,n];
% neigh_a = [2,3,4];
% elseif(mod(i,n)==0 && i+n<=n_S && i-n>0)
% neigh_s = [-1,-n,n];
% neigh_a = [1,2,4];
% elseif(mod(i,n)>1 && i-n<0)
% neigh_s = [-1,1,n];
% neigh_a = [1,3,4];
% elseif(mod(i,n)>1 && i+n>n_S)
% neigh_s = [-1,-n,1];
% neigh_a = [1,2,3];
% elseif(i==1)
% neigh_s = [1,n];
% neigh_a = [3,4];
% elseif(i==n)
% neigh_s = [-1,n];
% neigh_a = [1,4];
% elseif(i==n*(n-1)+1)
% neigh_s = [-n,1];
% neigh_a = [2,3];
% elseif(i==n^2)
% neigh_s = [-1,-n];
% neigh_a = [1,2];
% end
% for a=1:length(neigh_a)
% r = repmat(r_prob(2),length(neigh_a),1);
% r(a) = r_prob(1);
% r=r/sum(r);
% for c_s=1:length(neigh_s)
% P(i,a,i+neigh_s(c_s)) = r(c_s);
% end
% end
% if(mod(i,n)==1 && i~=1 && i~=(n-1)*n+1)
% i=i+n-1;
% else
% i=i+1;
% end
% end