From 6eea73fd9773c6f8b22caba9f681c5a92c39f220 Mon Sep 17 00:00:00 2001 From: Jeremy Fix Date: Sat, 29 Sep 2018 09:38:19 +0200 Subject: [PATCH] indent --- examples/example-004-001-cliff-onestep.cc | 170 +++++++++++----------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/examples/example-004-001-cliff-onestep.cc b/examples/example-004-001-cliff-onestep.cc index 45a5d00..64d0b84 100644 --- a/examples/example-004-001-cliff-onestep.cc +++ b/examples/example-004-001-cliff-onestep.cc @@ -4,7 +4,7 @@ #include #include - + #define NB_EPISODES 3000 @@ -30,90 +30,90 @@ int main(int argc, char* argv[]) { std::random_device rd; std::mt19937 gen(rd()); - // 1) Instantiate the simulator - Param param; - Simulator simulator(param); - - // 2) Instantiate the ActorCritic - auto action_begin = rl::enumerator(rl::problem::cliff_walking::Action::actionNorth); - auto action_end = action_begin + rl::problem::cliff_walking::actionSize; - unsigned int nb_features = Cliff::size; - Architecture archi(nb_features, - [](const S& s) { return s;}, - action_begin, action_end, gen); - - // 3) Instantiate the learner - Learner learner(archi, paramGAMMA, paramALPHA_V, paramALPHA_P); - - // 4) run NB_EPISODES episodes - unsigned int episode; - unsigned int step; - A action; - S state, next; - double rew; - - std::cout << "Learning " << std::endl; - for(episode = 0 ;episode < NB_EPISODES; ++episode) { - simulator.restart(); - learner.restart(); - step = 0; - std::cout << '\r' << "Episode " << episode << std::flush; - - state = simulator.sense(); - - while(true) { - action = archi.sample_action(state); - try { - // The following may raise a Terminal state exception - simulator.timeStep(action); - - rew = simulator.reward(); - next = simulator.sense(); - - learner.learn(state, action, rew, next); - - state = next; - ++step; - } - catch(rl::exception::Terminal& e) { - learner.learn(state, action, simulator.reward()); - break; - } + // 1) Instantiate the simulator + Param param; + Simulator simulator(param); + + // 2) Instantiate the ActorCritic + auto action_begin = rl::enumerator(rl::problem::cliff_walking::Action::actionNorth); + auto action_end = action_begin + rl::problem::cliff_walking::actionSize; + unsigned int nb_features = Cliff::size; + Architecture archi(nb_features, + [](const S& s) { return s;}, + action_begin, action_end, gen); + + // 3) Instantiate the learner + Learner learner(archi, paramGAMMA, paramALPHA_V, paramALPHA_P); + + // 4) run NB_EPISODES episodes + unsigned int episode; + unsigned int step; + A action; + S state, next; + double rew; + + std::cout << "Learning " << std::endl; + for(episode = 0 ;episode < NB_EPISODES; ++episode) { + simulator.restart(); + learner.restart(); + step = 0; + std::cout << '\r' << "Episode " << episode << std::flush; + + state = simulator.sense(); + + while(true) { + action = archi.sample_action(state); + try { + // The following may raise a Terminal state exception + simulator.timeStep(action); + + rew = simulator.reward(); + next = simulator.sense(); + + learner.learn(state, action, rew, next); + + state = next; + ++step; + } + catch(rl::exception::Terminal& e) { + learner.learn(state, action, simulator.reward()); + break; + } + } } - } - std::cout << std::endl; - - std::cout << "Testing the learned policy" << std::endl; - // After this training phase, we test the policy - unsigned int nb_test_episodes = 1000; - double cum_length = 0.0; - for(unsigned int i = 0 ; i < nb_test_episodes; ++i) { - simulator.restart(); - step = 0; - state = simulator.sense(); - while(true) { - action = archi.sample_action(state); - try { - // The following may raise a Terminal state exception - simulator.timeStep(action); - state = simulator.sense(); - ++step; - } - catch(rl::exception::Terminal& e) { - break; - } + std::cout << std::endl; + + std::cout << "Testing the learned policy" << std::endl; + // After this training phase, we test the policy + unsigned int nb_test_episodes = 1000; + double cum_length = 0.0; + for(unsigned int i = 0 ; i < nb_test_episodes; ++i) { + simulator.restart(); + step = 0; + state = simulator.sense(); + while(true) { + action = archi.sample_action(state); + try { + // The following may raise a Terminal state exception + simulator.timeStep(action); + state = simulator.sense(); + ++step; + } + catch(rl::exception::Terminal& e) { + break; + } + } + cum_length += step; } - cum_length += step; - } - std::cout << "The mean length of "<< nb_test_episodes - <<" testing episodes is " << cum_length / double(nb_test_episodes) << std::endl; - - // And let us display the action probabilities for the first state : - std::cout << "The probabilities of the actions of the learned controller, in the start state are :" << std::endl; - auto proba = archi.get_action_probabilities(0); - std::cout << "P(North/s=start) = " << proba[rl::problem::cliff_walking::Action::actionNorth] << std::endl; - std::cout << "P(East/s=start) = " << proba[rl::problem::cliff_walking::Action::actionEast] << std::endl; - std::cout << "P(South/s=start) = " << proba[rl::problem::cliff_walking::Action::actionSouth] << std::endl; - std::cout << "P(West/s=start) = " << proba[rl::problem::cliff_walking::Action::actionWest] << std::endl; - + std::cout << "The mean length of "<< nb_test_episodes + <<" testing episodes is " << cum_length / double(nb_test_episodes) << std::endl; + + // And let us display the action probabilities for the first state : + std::cout << "The probabilities of the actions of the learned controller, in the start state are :" << std::endl; + auto proba = archi.get_action_probabilities(0); + std::cout << "P(North/s=start) = " << proba[rl::problem::cliff_walking::Action::actionNorth] << std::endl; + std::cout << "P(East/s=start) = " << proba[rl::problem::cliff_walking::Action::actionEast] << std::endl; + std::cout << "P(South/s=start) = " << proba[rl::problem::cliff_walking::Action::actionSouth] << std::endl; + std::cout << "P(West/s=start) = " << proba[rl::problem::cliff_walking::Action::actionWest] << std::endl; + }