-
Notifications
You must be signed in to change notification settings - Fork 0
/
Environment.h
151 lines (128 loc) · 3.58 KB
/
Environment.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// Poisson distribution PMF Generator and Jack Rental Cars Environment for
// Expected return calculation
#pragma once
#include <vector>
#include <cmath>
using namespace std;
class poisson {
public:
poisson(double lambda =3) {
_lambda = lambda;
_min_n = 0;
bool state = true;
double sum = 0;
double temp;
while (true) {
if (state == true) {
temp = generate(_min_n);
if (temp <= 0.01)
_min_n++;
else {
pmf.push_back(temp);
sum += temp;
_max_n = _min_n + 1;
state = false;
}
}
else{
temp = generate(_max_n);
if (temp >= 0.01) {
pmf.push_back(temp);
sum += temp;
_max_n++;
}
else
break;
}
}
double addeval = (1 - sum) / (_max_n - _min_n);
for (int i = 0; i < pmf.size(); i++)
pmf[i] += addeval;
}
int Min() {
return _min_n;
}
int Max() {
return _max_n;
}
vector<double> vals() {
return pmf;
}
private:
double _lambda;
int _max_n;
int _min_n;
vector<double> pmf;
double factoriel(int n) {
if (n == 0 || n == 1)
return 1;
else
return (n * factoriel(n - 1));
}
double generate(int n) {
int fact = factoriel(n);
return ((pow(_lambda, n) * exp(-_lambda)) / fact);
}
};
class Environment
{
friend class Agent;
public:
Environment( int max_customer = 20, int max_transition = 5, double discount_factor = 0.9, int penalty = 2, int income = 10) {
vector<double> temp(max_customer + 1, 0.0f);
for (int i = 0; i < max_customer + 1; ++i)
_Values.push_back(temp);
temp.clear();
_nS[0] = max_customer + 1;
_nS[1] = max_customer + 1;
int lambda[4] = { 3,3,4,2 };
a_contract = poisson(lambda[0]);
a_termination = poisson(lambda[1]);
b_contract = poisson(lambda[3]);
b_termination = poisson(lambda[3]);
_max_customer = max_customer;
_penalty = penalty;
_income = income;
_discount_factor = discount_factor;
}
double expected_reward(int state[2], int action)
{
double reward = 0;
int new_state[2] = {max(min(state[0] - action, _max_customer),0),max(min(state[1] + action, _max_customer),0)};
reward += _penalty * abs(action);
for (int a_new = a_contract.Min(); a_new < a_contract.Max(); a_new++)
for (int b_new = b_contract.Min(); b_new < b_contract.Max() ; b_new++)
for (int a_terminated = a_termination.Min(); a_terminated < a_termination.Max(); a_terminated++)
for (int b_terminated = b_termination.Min(); b_terminated < b_termination.Max(); b_terminated++)
{
double prob = a_contract.vals()[a_new] * a_termination.vals()[a_terminated] * b_contract.vals()[b_new] * b_termination.vals()[b_terminated];
int max_contract_a = min(new_state[0], a_new);
int max_contract_b = min(new_state[1], b_new);
double r = (max_contract_a + max_contract_b) * _income;
int next_state[2] = { max(min(new_state[0] - max_contract_a + a_terminated,_max_customer),0),
max(min(new_state[1] - max_contract_b + b_terminated,_max_customer),0) };
reward += prob * (r + _discount_factor * _Values[next_state[0]][next_state[1]]);
}
return reward;
}
double get_v(int i, int j) {
return _Values[i][j];
}
void set_v(int i, int j, double value) {
_Values[i][j] = value;
}
~Environment(){}
private:
const int defaultinit[4] = { 3,3,4,2 };
int _nS[2];
int _nA;
int _max_customer;
double _penalty;
double _income;
double _discount_factor;
poisson a_contract;
poisson a_termination;
poisson b_contract;
poisson b_termination;
vector<vector<double>> _Values;
};