Skip to content

Commit

Permalink
indent
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremyfix committed Sep 29, 2018
1 parent 71c178a commit 6eea73f
Showing 1 changed file with 85 additions and 85 deletions.
170 changes: 85 additions & 85 deletions examples/example-004-001-cliff-onestep.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include <rl.hpp>
#include <random>


#define NB_EPISODES 3000

Expand All @@ -30,90 +30,90 @@ int main(int argc, char* argv[]) {
std::random_device rd;
std::mt19937 gen(rd());

// 1) Instantiate the simulator
Param param;
Simulator simulator(param);

// 2) Instantiate the ActorCritic
auto action_begin = rl::enumerator<A>(rl::problem::cliff_walking::Action::actionNorth);
auto action_end = action_begin + rl::problem::cliff_walking::actionSize;
unsigned int nb_features = Cliff::size;
Architecture archi(nb_features,
[](const S& s) { return s;},
action_begin, action_end, gen);

// 3) Instantiate the learner
Learner learner(archi, paramGAMMA, paramALPHA_V, paramALPHA_P);

// 4) run NB_EPISODES episodes
unsigned int episode;
unsigned int step;
A action;
S state, next;
double rew;

std::cout << "Learning " << std::endl;
for(episode = 0 ;episode < NB_EPISODES; ++episode) {
simulator.restart();
learner.restart();
step = 0;
std::cout << '\r' << "Episode " << episode << std::flush;

state = simulator.sense();

while(true) {
action = archi.sample_action(state);
try {
// The following may raise a Terminal state exception
simulator.timeStep(action);

rew = simulator.reward();
next = simulator.sense();

learner.learn(state, action, rew, next);

state = next;
++step;
}
catch(rl::exception::Terminal& e) {
learner.learn(state, action, simulator.reward());
break;
}
// 1) Instantiate the simulator
Param param;
Simulator simulator(param);

// 2) Instantiate the ActorCritic
auto action_begin = rl::enumerator<A>(rl::problem::cliff_walking::Action::actionNorth);
auto action_end = action_begin + rl::problem::cliff_walking::actionSize;
unsigned int nb_features = Cliff::size;
Architecture archi(nb_features,
[](const S& s) { return s;},
action_begin, action_end, gen);

// 3) Instantiate the learner
Learner learner(archi, paramGAMMA, paramALPHA_V, paramALPHA_P);

// 4) run NB_EPISODES episodes
unsigned int episode;
unsigned int step;
A action;
S state, next;
double rew;

std::cout << "Learning " << std::endl;
for(episode = 0 ;episode < NB_EPISODES; ++episode) {
simulator.restart();
learner.restart();
step = 0;
std::cout << '\r' << "Episode " << episode << std::flush;

state = simulator.sense();

while(true) {
action = archi.sample_action(state);
try {
// The following may raise a Terminal state exception
simulator.timeStep(action);

rew = simulator.reward();
next = simulator.sense();

learner.learn(state, action, rew, next);

state = next;
++step;
}
catch(rl::exception::Terminal& e) {
learner.learn(state, action, simulator.reward());
break;
}
}
}
}
std::cout << std::endl;

std::cout << "Testing the learned policy" << std::endl;
// After this training phase, we test the policy
unsigned int nb_test_episodes = 1000;
double cum_length = 0.0;
for(unsigned int i = 0 ; i < nb_test_episodes; ++i) {
simulator.restart();
step = 0;
state = simulator.sense();
while(true) {
action = archi.sample_action(state);
try {
// The following may raise a Terminal state exception
simulator.timeStep(action);
state = simulator.sense();
++step;
}
catch(rl::exception::Terminal& e) {
break;
}
std::cout << std::endl;

std::cout << "Testing the learned policy" << std::endl;
// After this training phase, we test the policy
unsigned int nb_test_episodes = 1000;
double cum_length = 0.0;
for(unsigned int i = 0 ; i < nb_test_episodes; ++i) {
simulator.restart();
step = 0;
state = simulator.sense();
while(true) {
action = archi.sample_action(state);
try {
// The following may raise a Terminal state exception
simulator.timeStep(action);
state = simulator.sense();
++step;
}
catch(rl::exception::Terminal& e) {
break;
}
}
cum_length += step;
}
cum_length += step;
}
std::cout << "The mean length of "<< nb_test_episodes
<<" testing episodes is " << cum_length / double(nb_test_episodes) << std::endl;

// And let us display the action probabilities for the first state :
std::cout << "The probabilities of the actions of the learned controller, in the start state are :" << std::endl;
auto proba = archi.get_action_probabilities(0);
std::cout << "P(North/s=start) = " << proba[rl::problem::cliff_walking::Action::actionNorth] << std::endl;
std::cout << "P(East/s=start) = " << proba[rl::problem::cliff_walking::Action::actionEast] << std::endl;
std::cout << "P(South/s=start) = " << proba[rl::problem::cliff_walking::Action::actionSouth] << std::endl;
std::cout << "P(West/s=start) = " << proba[rl::problem::cliff_walking::Action::actionWest] << std::endl;

std::cout << "The mean length of "<< nb_test_episodes
<<" testing episodes is " << cum_length / double(nb_test_episodes) << std::endl;

// And let us display the action probabilities for the first state :
std::cout << "The probabilities of the actions of the learned controller, in the start state are :" << std::endl;
auto proba = archi.get_action_probabilities(0);
std::cout << "P(North/s=start) = " << proba[rl::problem::cliff_walking::Action::actionNorth] << std::endl;
std::cout << "P(East/s=start) = " << proba[rl::problem::cliff_walking::Action::actionEast] << std::endl;
std::cout << "P(South/s=start) = " << proba[rl::problem::cliff_walking::Action::actionSouth] << std::endl;
std::cout << "P(West/s=start) = " << proba[rl::problem::cliff_walking::Action::actionWest] << std::endl;

}

0 comments on commit 6eea73f

Please sign in to comment.