Skip to content

Commit

Permalink
implements fitted value iteration on a barycentric mesh
Browse files Browse the repository at this point in the history
  • Loading branch information
RussTedrake committed Feb 11, 2018
1 parent 7c29b62 commit e684a6b
Show file tree
Hide file tree
Showing 9 changed files with 470 additions and 1 deletion.
16 changes: 16 additions & 0 deletions math/barycentric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ void BarycentricMesh<T>::get_mesh_point(int index,
DRAKE_DEMAND(index == 0); // otherwise the index was out of range.
}

template <typename T>
VectorX<T> BarycentricMesh<T>::get_mesh_point(int index) const {
VectorX<T> point(get_input_size());
get_mesh_point(index, &point);
return point;
}

template <typename T>
void BarycentricMesh<T>::EvalBarycentricWeights(
const Eigen::Ref<const VectorX<T>>& input,
Expand Down Expand Up @@ -153,6 +160,15 @@ void BarycentricMesh<T>::Eval(const Eigen::Ref<const MatrixX<T>>& mesh_values,
}
}

template <typename T>
VectorX<T> BarycentricMesh<T>::Eval(
const Eigen::Ref<const MatrixX<T>>& mesh_values,
const Eigen::Ref<const VectorX<T>>& input) const {
VectorX<T> output(mesh_values.rows());
Eval(mesh_values, input, &output);
return output;
}

template <typename T>
MatrixX<T> BarycentricMesh<T>::MeshValuesFrom(
const std::function<VectorX<T>(const Eigen::Ref<const VectorX<T>>&)>&
Expand Down
9 changes: 9 additions & 0 deletions math/barycentric.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ class BarycentricMesh {
/// @param point is set to the num_inputs-by-1 location of the mesh point.
void get_mesh_point(int index, EigenPtr<Eigen::VectorXd> point) const;

/// Returns the position of a mesh point in the input space referenced by its
/// scalar index to @p point.
/// @param index must be in [0, get_num_mesh_points).
VectorX<T> get_mesh_point(int index) const;

/// Writes the mesh indices used for interpolation to @p mesh_indices, and the
/// interpolating coefficients to @p weights. Inputs that are outside the
/// bounding box of the input_grid are interpolated as though they were
Expand Down Expand Up @@ -108,6 +113,10 @@ class BarycentricMesh {
const Eigen::Ref<const VectorX<T>>& input,
EigenPtr<VectorX<T>> output) const;

/// Returns the function evaluated at @p input.
VectorX<T> Eval(const Eigen::Ref<const MatrixX<T>>& mesh_values,
const Eigen::Ref<const VectorX<T>>& input) const;

/// Evaluates @p vector_func at all input mesh points and extracts the mesh
/// value matrix that should be used to approximate the function with this
/// barycentric interpolation.
Expand Down
2 changes: 1 addition & 1 deletion math/discrete_algebraic_riccati_equation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ void swap_block(Eigen::Ref<Eigen::MatrixXd> S, Eigen::Ref<Eigen::MatrixXd> T,
// stable eigenvalue(s).
// Push the block pointed by q to the position pointed by p.
// Finish when n stable eigenvalues are placed at the top-left n by n matrix.
// The algorithm for swaping blocks is described in the papers
// The algorithm for swapping blocks is described in the papers
// "A generalized eigenvalue approach for solving Riccati equations" by P. Van
// Dooren, 1981, and "Numerical Methods for General and Structured Eigenvalue
// Problems" by Daniel Kressner, 2005.
Expand Down
6 changes: 6 additions & 0 deletions math/test/barycentric_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ GTEST_TEST(BarycentricTest, GetMeshPoints) {
EXPECT_TRUE(CompareMatrices(point, Vector3d{0, 2, 4}));
bary.get_mesh_point(3, &point);
EXPECT_TRUE(CompareMatrices(point, Vector3d{1, 2, 4}));

// Test the alternative call signature.
EXPECT_TRUE(CompareMatrices(bary.get_mesh_point(0), Vector3d{0, 2, 3}));
}

GTEST_TEST(BarycentricTest, EvalWeights) {
Expand Down Expand Up @@ -121,6 +124,9 @@ GTEST_TEST(BarycentricTest, EvalTest) {
mesh(0, 2) = 10.;
bary.Eval(mesh, Vector2d{.25, .75}, &value);
EXPECT_NEAR(value[0], 6.25, 1e-8);

// Test the alternative call signature.
EXPECT_NEAR(bary.Eval(mesh, Vector2d{.25, .75})[0], 6.25, 1e-8);
}

GTEST_TEST(BarycentricTest, MultidimensionalOutput) {
Expand Down
23 changes: 23 additions & 0 deletions systems/controllers/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,17 @@ drake_cc_library(
],
)

drake_cc_library(
name = "dynamic_programming",
srcs = ["dynamic_programming.cc"],
hdrs = ["dynamic_programming.h"],
deps = [
"//drake/systems/analysis:simulator",
"//drake/systems/framework",
"//drake/systems/primitives:barycentric_system",
],
)

drake_cc_library(
name = "inverse_dynamics",
srcs = ["inverse_dynamics.cc"],
Expand Down Expand Up @@ -223,6 +234,18 @@ drake_cc_library(
],
)

drake_cc_googletest(
name = "dynamic_programming_test",
deps = [
":dynamic_programming",
":linear_quadratic_regulator",
"//drake/common/proto:call_matlab",
"//drake/common/test_utilities:eigen_matrix_compare",
"//drake/systems/primitives:integrator",
"//drake/systems/primitives:linear_system",
],
)

drake_cc_googletest(
name = "polynomial_encode_decode_test",
deps = [
Expand Down
140 changes: 140 additions & 0 deletions systems/controllers/dynamic_programming.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#include "drake/systems/controllers/dynamic_programming.h"

#include <limits>
#include <utility>
#include <vector>

#include "drake/systems/analysis/simulator.h"

namespace drake {
namespace systems {
namespace controllers {

std::pair<std::unique_ptr<BarycentricMeshSystem<double>>, Eigen::MatrixXd>
FittedValueIteration(
Simulator<double>* simulator,
const std::function<double(const Context<double>& context)>& cost_function,
const math::BarycentricMesh<double>::MeshGrid& state_grid,
const math::BarycentricMesh<double>::MeshGrid& input_grid,
const double timestep, const DynamicProgrammingOptions& options) {
// TODO(russt): handle discrete state.
const auto& system = simulator->get_system();
auto& context = simulator->get_mutable_context();

DRAKE_DEMAND(context.has_only_continuous_state());
DRAKE_DEMAND(context.get_continuous_state().size() ==
static_cast<int>(state_grid.size()));

DRAKE_DEMAND(context.get_num_input_ports() == 1);
DRAKE_DEMAND(system.get_num_total_inputs() ==
static_cast<int>(input_grid.size()));

DRAKE_DEMAND(timestep > 0.);
DRAKE_DEMAND(options.discount_factor > 0. && options.discount_factor <= 1.);
if (!options.state_indices_with_periodic_boundary_conditions.empty()) {
// Make sure all periodic boundary conditions are in range.
DRAKE_DEMAND(
*options.state_indices_with_periodic_boundary_conditions.begin() >= 0);
DRAKE_DEMAND(
*options.state_indices_with_periodic_boundary_conditions.rbegin() <
context.get_continuous_state().size());
}

// TODO(russt): check that the system is time-invariant.

math::BarycentricMesh<double> state_mesh(state_grid);
math::BarycentricMesh<double> input_mesh(input_grid);

const int kNumStates = state_mesh.get_num_mesh_points();
const int kNumInputs = input_mesh.get_num_mesh_points();
const int kNumIndices = state_mesh.get_num_interpolants();

std::vector<Eigen::MatrixXi> Tind(kNumInputs);
std::vector<Eigen::MatrixXd> T(kNumInputs);
std::vector<Eigen::RowVectorXd> cost(kNumInputs);

{ // Build transition matrices.
std::cout << "Computing transition and cost matrices";
auto& sim_state = context.get_mutable_continuous_state_vector();

Eigen::VectorXd input_vec(input_mesh.get_input_size());
Eigen::VectorXd state_vec(state_mesh.get_input_size());

Eigen::VectorXi Tind_tmp(kNumIndices);
Eigen::VectorXd T_tmp(kNumIndices);

for (int input = 0; input < kNumInputs; input++) {
std::cout << ".";
Tind[input].resize(kNumIndices, kNumStates);
T[input].resize(kNumIndices, kNumStates);
cost[input].resize(kNumStates);

input_mesh.get_mesh_point(input, &input_vec);
context.FixInputPort(0, input_vec);

for (int state = 0; state < kNumStates; state++) {
context.set_time(0.0);
sim_state.SetFromVector(state_mesh.get_mesh_point(state));

cost[input](state) = timestep * cost_function(context);

simulator->StepTo(timestep);
state_vec = sim_state.CopyToVector();

for (int dim :
options.state_indices_with_periodic_boundary_conditions) {
const double lower = *state_grid[dim].begin();
const double upper = *state_grid[dim].rbegin();
state_vec[dim] =
std::fmod(state_vec[dim] - lower, upper - lower) + lower;
}

state_mesh.EvalBarycentricWeights(state_vec, &Tind_tmp, &T_tmp);
Tind[input].col(state) = Tind_tmp;
T[input].col(state) = T_tmp;
}
}
std::cout << "done." << std::endl;
}

// Perform value iteration loop
Eigen::RowVectorXd J = Eigen::RowVectorXd::Zero(kNumStates);
Eigen::RowVectorXd Jnext(kNumStates);
Eigen::RowVectorXi Pi(kNumStates);

double max_diff = std::numeric_limits<double>::infinity();
while (max_diff > options.convergence_tol) {
for (int state = 0; state < kNumStates; state++) {
Jnext(state) = std::numeric_limits<double>::infinity();

for (int input = 0; input < kNumInputs; input++) {
double Jinput = cost[input](state);
for (int index = 0; index < kNumIndices; index++) {
Jinput += options.discount_factor * T[input](index, state) *
J(Tind[input](index, state));
}
if (Jinput < Jnext(state)) {
Jnext(state) = Jinput;
Pi(state) = input;
}
}
}
max_diff = (J - Jnext).lpNorm<Eigen::Infinity>();
J = Jnext;
// std::cout << "J = " << J << std::endl;
// std::cout << "Pi = " << Pi << std::endl;
}

// Create the policy.
Eigen::MatrixXd policy_values(input_mesh.get_input_size(), kNumStates);
for (int state = 0; state < kNumStates; state++) {
policy_values.col(state) = input_mesh.get_mesh_point(Pi(state));
}
auto policy = std::make_unique<BarycentricMeshSystem<double>>(state_mesh,
policy_values);
return std::make_pair(std::move(policy), J);
}

} // namespace controllers
} // namespace systems
} // namespace drake
75 changes: 75 additions & 0 deletions systems/controllers/dynamic_programming.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#pragma once

#include <memory>
#include <set>
#include <utility>

#include "drake/math/barycentric.h"
#include "drake/systems/analysis/simulator.h"
#include "drake/systems/framework/vector_system.h"
#include "drake/systems/primitives/barycentric_system.h"

namespace drake {
namespace systems {
namespace controllers {

/// Consolidates the many possible options to be passed to the dynamic
/// programming algorithms.
struct DynamicProgrammingOptions {
double discount_factor{1.};
std::set<int> state_indices_with_periodic_boundary_conditions;
double convergence_tol = 1e-4;
// TODO(russt): Add visualization callback
};

/// Implements Fitted Value Iteration on a (triangulated) Barycentric Mesh, as
/// described in
/// http://underactuated.csail.mit.edu/underactuated.html?chapter=dp .
/// It currently requires that the system to be optimized has only continuous
/// state and it is assumed to be time invariant. This code makes a
/// discrete-time approximation (using @p timestep) for the value iteration
/// update.
///
/// @param simulator contains the reference to the System being optimized and to
/// a Context for that system, which may contain non-default Parameters, etc.
/// The @p simulator is run for @p timestep seconds from every point on the mesh
/// in order to approximate the dynamics.. all of the simulation parameters
/// (integrator, etc) are relevant during that evaluation.
///
/// @param cost_function is the instantaneous cost (referred to as g(x,u) in the
/// notes. The cost-to-go is incremented by g(x,u)*timestep on each step.
/// @param state_grid defines the mesh on the state space used to represent
/// the cost-to-go function and the resulting policy.
/// @param input_grid defines the discrete action space used in the value
/// iteraiton update.
/// @param timestep a time in seconds used for the discrete-time approximation.
/// @param options optional DynamicProgrammingOptions structure.
///
/// @return a std::pair containing the resulting policy, implemented as a
/// BarycentricMeshSystem, and the MatrixXd J that defines the expected
/// cost-to-go on a BarycentricMesh using @p state_grid. The policy has a
/// single vector input (which is the continuous state of the system passed
/// in through @p simulator) and a single vector output (which is the input
/// of the system passed in through @p simulator).
///
std::pair<std::unique_ptr<BarycentricMeshSystem<double>>, Eigen::MatrixXd>
FittedValueIteration(
Simulator<double>* simulator, // has system and context, as well
// as integrator params, etc.
const std::function<double(const Context<double>& context)>& cost_function,
const math::BarycentricMesh<double>::MeshGrid& state_grid,
const math::BarycentricMesh<double>::MeshGrid& input_grid,
const double timestep,
const DynamicProgrammingOptions& options = DynamicProgrammingOptions());

// TODO(russt): Handle the specific case where system is control affine and the
// cost function is quadratic positive-definite. (Adds requirements on the
// system and cost function (e.g. autodiff/symbolic), and doesn't need the
// input_grid argument).

// TODO(russt): Implement more general FittedValueIteration methods as the
// function approximation tools become available.

} // namespace controllers
} // namespace systems
} // namespace drake
Loading

0 comments on commit e684a6b

Please sign in to comment.