forked from RobotLocomotion/drake
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
implements fitted value iteration on a barycentric mesh
- Loading branch information
1 parent
7c29b62
commit e684a6b
Showing
9 changed files
with
470 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
#include "drake/systems/controllers/dynamic_programming.h" | ||
|
||
#include <limits> | ||
#include <utility> | ||
#include <vector> | ||
|
||
#include "drake/systems/analysis/simulator.h" | ||
|
||
namespace drake { | ||
namespace systems { | ||
namespace controllers { | ||
|
||
std::pair<std::unique_ptr<BarycentricMeshSystem<double>>, Eigen::MatrixXd> | ||
FittedValueIteration( | ||
Simulator<double>* simulator, | ||
const std::function<double(const Context<double>& context)>& cost_function, | ||
const math::BarycentricMesh<double>::MeshGrid& state_grid, | ||
const math::BarycentricMesh<double>::MeshGrid& input_grid, | ||
const double timestep, const DynamicProgrammingOptions& options) { | ||
// TODO(russt): handle discrete state. | ||
const auto& system = simulator->get_system(); | ||
auto& context = simulator->get_mutable_context(); | ||
|
||
DRAKE_DEMAND(context.has_only_continuous_state()); | ||
DRAKE_DEMAND(context.get_continuous_state().size() == | ||
static_cast<int>(state_grid.size())); | ||
|
||
DRAKE_DEMAND(context.get_num_input_ports() == 1); | ||
DRAKE_DEMAND(system.get_num_total_inputs() == | ||
static_cast<int>(input_grid.size())); | ||
|
||
DRAKE_DEMAND(timestep > 0.); | ||
DRAKE_DEMAND(options.discount_factor > 0. && options.discount_factor <= 1.); | ||
if (!options.state_indices_with_periodic_boundary_conditions.empty()) { | ||
// Make sure all periodic boundary conditions are in range. | ||
DRAKE_DEMAND( | ||
*options.state_indices_with_periodic_boundary_conditions.begin() >= 0); | ||
DRAKE_DEMAND( | ||
*options.state_indices_with_periodic_boundary_conditions.rbegin() < | ||
context.get_continuous_state().size()); | ||
} | ||
|
||
// TODO(russt): check that the system is time-invariant. | ||
|
||
math::BarycentricMesh<double> state_mesh(state_grid); | ||
math::BarycentricMesh<double> input_mesh(input_grid); | ||
|
||
const int kNumStates = state_mesh.get_num_mesh_points(); | ||
const int kNumInputs = input_mesh.get_num_mesh_points(); | ||
const int kNumIndices = state_mesh.get_num_interpolants(); | ||
|
||
std::vector<Eigen::MatrixXi> Tind(kNumInputs); | ||
std::vector<Eigen::MatrixXd> T(kNumInputs); | ||
std::vector<Eigen::RowVectorXd> cost(kNumInputs); | ||
|
||
{ // Build transition matrices. | ||
std::cout << "Computing transition and cost matrices"; | ||
auto& sim_state = context.get_mutable_continuous_state_vector(); | ||
|
||
Eigen::VectorXd input_vec(input_mesh.get_input_size()); | ||
Eigen::VectorXd state_vec(state_mesh.get_input_size()); | ||
|
||
Eigen::VectorXi Tind_tmp(kNumIndices); | ||
Eigen::VectorXd T_tmp(kNumIndices); | ||
|
||
for (int input = 0; input < kNumInputs; input++) { | ||
std::cout << "."; | ||
Tind[input].resize(kNumIndices, kNumStates); | ||
T[input].resize(kNumIndices, kNumStates); | ||
cost[input].resize(kNumStates); | ||
|
||
input_mesh.get_mesh_point(input, &input_vec); | ||
context.FixInputPort(0, input_vec); | ||
|
||
for (int state = 0; state < kNumStates; state++) { | ||
context.set_time(0.0); | ||
sim_state.SetFromVector(state_mesh.get_mesh_point(state)); | ||
|
||
cost[input](state) = timestep * cost_function(context); | ||
|
||
simulator->StepTo(timestep); | ||
state_vec = sim_state.CopyToVector(); | ||
|
||
for (int dim : | ||
options.state_indices_with_periodic_boundary_conditions) { | ||
const double lower = *state_grid[dim].begin(); | ||
const double upper = *state_grid[dim].rbegin(); | ||
state_vec[dim] = | ||
std::fmod(state_vec[dim] - lower, upper - lower) + lower; | ||
} | ||
|
||
state_mesh.EvalBarycentricWeights(state_vec, &Tind_tmp, &T_tmp); | ||
Tind[input].col(state) = Tind_tmp; | ||
T[input].col(state) = T_tmp; | ||
} | ||
} | ||
std::cout << "done." << std::endl; | ||
} | ||
|
||
// Perform value iteration loop | ||
Eigen::RowVectorXd J = Eigen::RowVectorXd::Zero(kNumStates); | ||
Eigen::RowVectorXd Jnext(kNumStates); | ||
Eigen::RowVectorXi Pi(kNumStates); | ||
|
||
double max_diff = std::numeric_limits<double>::infinity(); | ||
while (max_diff > options.convergence_tol) { | ||
for (int state = 0; state < kNumStates; state++) { | ||
Jnext(state) = std::numeric_limits<double>::infinity(); | ||
|
||
for (int input = 0; input < kNumInputs; input++) { | ||
double Jinput = cost[input](state); | ||
for (int index = 0; index < kNumIndices; index++) { | ||
Jinput += options.discount_factor * T[input](index, state) * | ||
J(Tind[input](index, state)); | ||
} | ||
if (Jinput < Jnext(state)) { | ||
Jnext(state) = Jinput; | ||
Pi(state) = input; | ||
} | ||
} | ||
} | ||
max_diff = (J - Jnext).lpNorm<Eigen::Infinity>(); | ||
J = Jnext; | ||
// std::cout << "J = " << J << std::endl; | ||
// std::cout << "Pi = " << Pi << std::endl; | ||
} | ||
|
||
// Create the policy. | ||
Eigen::MatrixXd policy_values(input_mesh.get_input_size(), kNumStates); | ||
for (int state = 0; state < kNumStates; state++) { | ||
policy_values.col(state) = input_mesh.get_mesh_point(Pi(state)); | ||
} | ||
auto policy = std::make_unique<BarycentricMeshSystem<double>>(state_mesh, | ||
policy_values); | ||
return std::make_pair(std::move(policy), J); | ||
} | ||
|
||
} // namespace controllers | ||
} // namespace systems | ||
} // namespace drake |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#pragma once | ||
|
||
#include <memory> | ||
#include <set> | ||
#include <utility> | ||
|
||
#include "drake/math/barycentric.h" | ||
#include "drake/systems/analysis/simulator.h" | ||
#include "drake/systems/framework/vector_system.h" | ||
#include "drake/systems/primitives/barycentric_system.h" | ||
|
||
namespace drake { | ||
namespace systems { | ||
namespace controllers { | ||
|
||
/// Consolidates the many possible options to be passed to the dynamic | ||
/// programming algorithms. | ||
struct DynamicProgrammingOptions { | ||
double discount_factor{1.}; | ||
std::set<int> state_indices_with_periodic_boundary_conditions; | ||
double convergence_tol = 1e-4; | ||
// TODO(russt): Add visualization callback | ||
}; | ||
|
||
/// Implements Fitted Value Iteration on a (triangulated) Barycentric Mesh, as | ||
/// described in | ||
/// http://underactuated.csail.mit.edu/underactuated.html?chapter=dp . | ||
/// It currently requires that the system to be optimized has only continuous | ||
/// state and it is assumed to be time invariant. This code makes a | ||
/// discrete-time approximation (using @p timestep) for the value iteration | ||
/// update. | ||
/// | ||
/// @param simulator contains the reference to the System being optimized and to | ||
/// a Context for that system, which may contain non-default Parameters, etc. | ||
/// The @p simulator is run for @p timestep seconds from every point on the mesh | ||
/// in order to approximate the dynamics.. all of the simulation parameters | ||
/// (integrator, etc) are relevant during that evaluation. | ||
/// | ||
/// @param cost_function is the instantaneous cost (referred to as g(x,u) in the | ||
/// notes. The cost-to-go is incremented by g(x,u)*timestep on each step. | ||
/// @param state_grid defines the mesh on the state space used to represent | ||
/// the cost-to-go function and the resulting policy. | ||
/// @param input_grid defines the discrete action space used in the value | ||
/// iteraiton update. | ||
/// @param timestep a time in seconds used for the discrete-time approximation. | ||
/// @param options optional DynamicProgrammingOptions structure. | ||
/// | ||
/// @return a std::pair containing the resulting policy, implemented as a | ||
/// BarycentricMeshSystem, and the MatrixXd J that defines the expected | ||
/// cost-to-go on a BarycentricMesh using @p state_grid. The policy has a | ||
/// single vector input (which is the continuous state of the system passed | ||
/// in through @p simulator) and a single vector output (which is the input | ||
/// of the system passed in through @p simulator). | ||
/// | ||
std::pair<std::unique_ptr<BarycentricMeshSystem<double>>, Eigen::MatrixXd> | ||
FittedValueIteration( | ||
Simulator<double>* simulator, // has system and context, as well | ||
// as integrator params, etc. | ||
const std::function<double(const Context<double>& context)>& cost_function, | ||
const math::BarycentricMesh<double>::MeshGrid& state_grid, | ||
const math::BarycentricMesh<double>::MeshGrid& input_grid, | ||
const double timestep, | ||
const DynamicProgrammingOptions& options = DynamicProgrammingOptions()); | ||
|
||
// TODO(russt): Handle the specific case where system is control affine and the | ||
// cost function is quadratic positive-definite. (Adds requirements on the | ||
// system and cost function (e.g. autodiff/symbolic), and doesn't need the | ||
// input_grid argument). | ||
|
||
// TODO(russt): Implement more general FittedValueIteration methods as the | ||
// function approximation tools become available. | ||
|
||
} // namespace controllers | ||
} // namespace systems | ||
} // namespace drake |
Oops, something went wrong.