implements fitted value iteration on a barycentric mesh

RussTedrake · Feb 11, 2018 · e684a6b · e684a6b
1 parent 7c29b62
commit e684a6b
Show file tree

Hide file tree

Showing 9 changed files with 470 additions and 1 deletion.
diff --git a/math/barycentric.cc b/math/barycentric.cc
@@ -46,6 +46,13 @@ void BarycentricMesh<T>::get_mesh_point(int index,
   DRAKE_DEMAND(index == 0);  // otherwise the index was out of range.
 }
 
+template <typename T>
+VectorX<T> BarycentricMesh<T>::get_mesh_point(int index) const {
+  VectorX<T> point(get_input_size());
+  get_mesh_point(index, &point);
+  return point;
+}
+
 template <typename T>
 void BarycentricMesh<T>::EvalBarycentricWeights(
     const Eigen::Ref<const VectorX<T>>& input,
@@ -153,6 +160,15 @@ void BarycentricMesh<T>::Eval(const Eigen::Ref<const MatrixX<T>>& mesh_values,
   }
 }
 
+template <typename T>
+VectorX<T> BarycentricMesh<T>::Eval(
+    const Eigen::Ref<const MatrixX<T>>& mesh_values,
+    const Eigen::Ref<const VectorX<T>>& input) const {
+  VectorX<T> output(mesh_values.rows());
+  Eval(mesh_values, input, &output);
+  return output;
+}
+
 template <typename T>
 MatrixX<T> BarycentricMesh<T>::MeshValuesFrom(
     const std::function<VectorX<T>(const Eigen::Ref<const VectorX<T>>&)>&

diff --git a/math/barycentric.h b/math/barycentric.h
@@ -75,6 +75,11 @@ class BarycentricMesh {
   /// @param point is set to the num_inputs-by-1 location of the mesh point.
   void get_mesh_point(int index, EigenPtr<Eigen::VectorXd> point) const;
 
+  /// Returns the position of a mesh point in the input space referenced by its
+  /// scalar index to @p point.
+  /// @param index must be in [0, get_num_mesh_points).
+  VectorX<T> get_mesh_point(int index) const;
+
   /// Writes the mesh indices used for interpolation to @p mesh_indices, and the
   /// interpolating coefficients to @p weights.  Inputs that are outside the
   /// bounding box of the input_grid are interpolated as though they were
@@ -108,6 +113,10 @@ class BarycentricMesh {
             const Eigen::Ref<const VectorX<T>>& input,
             EigenPtr<VectorX<T>> output) const;
 
+  /// Returns the function evaluated at @p input.
+  VectorX<T> Eval(const Eigen::Ref<const MatrixX<T>>& mesh_values,
+                  const Eigen::Ref<const VectorX<T>>& input) const;
+
   /// Evaluates @p vector_func at all input mesh points and extracts the mesh
   /// value matrix that should be used to approximate the function with this
   /// barycentric interpolation.

diff --git a/math/discrete_algebraic_riccati_equation.cc b/math/discrete_algebraic_riccati_equation.cc
@@ -303,7 +303,7 @@ void swap_block(Eigen::Ref<Eigen::MatrixXd> S, Eigen::Ref<Eigen::MatrixXd> T,
 // stable eigenvalue(s).
 // Push the block pointed by q to the position pointed by p.
 // Finish when n stable eigenvalues are placed at the top-left n by n matrix.
-// The algorithm for swaping blocks is described in the papers
+// The algorithm for swapping blocks is described in the papers
 // "A generalized eigenvalue approach for solving Riccati equations" by P. Van
 // Dooren, 1981, and "Numerical Methods for General and Structured Eigenvalue
 // Problems" by Daniel Kressner, 2005.

diff --git a/math/test/barycentric_test.cc b/math/test/barycentric_test.cc
@@ -41,6 +41,9 @@ GTEST_TEST(BarycentricTest, GetMeshPoints) {
   EXPECT_TRUE(CompareMatrices(point, Vector3d{0, 2, 4}));
   bary.get_mesh_point(3, &point);
   EXPECT_TRUE(CompareMatrices(point, Vector3d{1, 2, 4}));
+
+  // Test the alternative call signature.
+  EXPECT_TRUE(CompareMatrices(bary.get_mesh_point(0), Vector3d{0, 2, 3}));
 }
 
 GTEST_TEST(BarycentricTest, EvalWeights) {
@@ -121,6 +124,9 @@ GTEST_TEST(BarycentricTest, EvalTest) {
   mesh(0, 2) = 10.;
   bary.Eval(mesh, Vector2d{.25, .75}, &value);
   EXPECT_NEAR(value[0], 6.25, 1e-8);
+
+  // Test the alternative call signature.
+  EXPECT_NEAR(bary.Eval(mesh, Vector2d{.25, .75})[0], 6.25, 1e-8);
 }
 
 GTEST_TEST(BarycentricTest, MultidimensionalOutput) {

diff --git a/systems/controllers/BUILD.bazel b/systems/controllers/BUILD.bazel
@@ -14,6 +14,17 @@ drake_cc_library(
     ],
 )
 
+drake_cc_library(
+    name = "dynamic_programming",
+    srcs = ["dynamic_programming.cc"],
+    hdrs = ["dynamic_programming.h"],
+    deps = [
+        "//drake/systems/analysis:simulator",
+        "//drake/systems/framework",
+        "//drake/systems/primitives:barycentric_system",
+    ],
+)
+
 drake_cc_library(
     name = "inverse_dynamics",
     srcs = ["inverse_dynamics.cc"],
@@ -223,6 +234,18 @@ drake_cc_library(
     ],
 )
 
+drake_cc_googletest(
+    name = "dynamic_programming_test",
+    deps = [
+        ":dynamic_programming",
+        ":linear_quadratic_regulator",
+        "//drake/common/proto:call_matlab",
+        "//drake/common/test_utilities:eigen_matrix_compare",
+        "//drake/systems/primitives:integrator",
+        "//drake/systems/primitives:linear_system",
+    ],
+)
+
 drake_cc_googletest(
     name = "polynomial_encode_decode_test",
     deps = [

diff --git a/systems/controllers/dynamic_programming.cc b/systems/controllers/dynamic_programming.cc
@@ -0,0 +1,140 @@
+#include "drake/systems/controllers/dynamic_programming.h"
+
+#include <limits>
+#include <utility>
+#include <vector>
+
+#include "drake/systems/analysis/simulator.h"
+
+namespace drake {
+namespace systems {
+namespace controllers {
+
+std::pair<std::unique_ptr<BarycentricMeshSystem<double>>, Eigen::MatrixXd>
+FittedValueIteration(
+    Simulator<double>* simulator,
+    const std::function<double(const Context<double>& context)>& cost_function,
+    const math::BarycentricMesh<double>::MeshGrid& state_grid,
+    const math::BarycentricMesh<double>::MeshGrid& input_grid,
+    const double timestep, const DynamicProgrammingOptions& options) {
+  // TODO(russt): handle discrete state.
+  const auto& system = simulator->get_system();
+  auto& context = simulator->get_mutable_context();
+
+  DRAKE_DEMAND(context.has_only_continuous_state());
+  DRAKE_DEMAND(context.get_continuous_state().size() ==
+               static_cast<int>(state_grid.size()));
+
+  DRAKE_DEMAND(context.get_num_input_ports() == 1);
+  DRAKE_DEMAND(system.get_num_total_inputs() ==
+               static_cast<int>(input_grid.size()));
+
+  DRAKE_DEMAND(timestep > 0.);
+  DRAKE_DEMAND(options.discount_factor > 0. && options.discount_factor <= 1.);
+  if (!options.state_indices_with_periodic_boundary_conditions.empty()) {
+    // Make sure all periodic boundary conditions are in range.
+    DRAKE_DEMAND(
+        *options.state_indices_with_periodic_boundary_conditions.begin() >= 0);
+    DRAKE_DEMAND(
+        *options.state_indices_with_periodic_boundary_conditions.rbegin() <
+        context.get_continuous_state().size());
+  }
+
+  // TODO(russt): check that the system is time-invariant.
+
+  math::BarycentricMesh<double> state_mesh(state_grid);
+  math::BarycentricMesh<double> input_mesh(input_grid);
+
+  const int kNumStates = state_mesh.get_num_mesh_points();
+  const int kNumInputs = input_mesh.get_num_mesh_points();
+  const int kNumIndices = state_mesh.get_num_interpolants();
+
+  std::vector<Eigen::MatrixXi> Tind(kNumInputs);
+  std::vector<Eigen::MatrixXd> T(kNumInputs);
+  std::vector<Eigen::RowVectorXd> cost(kNumInputs);
+
+  {  // Build transition matrices.
+    std::cout << "Computing transition and cost matrices";
+    auto& sim_state = context.get_mutable_continuous_state_vector();
+
+    Eigen::VectorXd input_vec(input_mesh.get_input_size());
+    Eigen::VectorXd state_vec(state_mesh.get_input_size());
+
+    Eigen::VectorXi Tind_tmp(kNumIndices);
+    Eigen::VectorXd T_tmp(kNumIndices);
+
+    for (int input = 0; input < kNumInputs; input++) {
+      std::cout << ".";
+      Tind[input].resize(kNumIndices, kNumStates);
+      T[input].resize(kNumIndices, kNumStates);
+      cost[input].resize(kNumStates);
+
+      input_mesh.get_mesh_point(input, &input_vec);
+      context.FixInputPort(0, input_vec);
+
+      for (int state = 0; state < kNumStates; state++) {
+        context.set_time(0.0);
+        sim_state.SetFromVector(state_mesh.get_mesh_point(state));
+
+        cost[input](state) = timestep * cost_function(context);
+
+        simulator->StepTo(timestep);
+        state_vec = sim_state.CopyToVector();
+
+        for (int dim :
+             options.state_indices_with_periodic_boundary_conditions) {
+          const double lower = *state_grid[dim].begin();
+          const double upper = *state_grid[dim].rbegin();
+          state_vec[dim] =
+              std::fmod(state_vec[dim] - lower, upper - lower) + lower;
+        }
+
+        state_mesh.EvalBarycentricWeights(state_vec, &Tind_tmp, &T_tmp);
+        Tind[input].col(state) = Tind_tmp;
+        T[input].col(state) = T_tmp;
+      }
+    }
+    std::cout << "done." << std::endl;
+  }
+
+  // Perform value iteration loop
+  Eigen::RowVectorXd J = Eigen::RowVectorXd::Zero(kNumStates);
+  Eigen::RowVectorXd Jnext(kNumStates);
+  Eigen::RowVectorXi Pi(kNumStates);
+
+  double max_diff = std::numeric_limits<double>::infinity();
+  while (max_diff > options.convergence_tol) {
+    for (int state = 0; state < kNumStates; state++) {
+      Jnext(state) = std::numeric_limits<double>::infinity();
+
+      for (int input = 0; input < kNumInputs; input++) {
+        double Jinput = cost[input](state);
+        for (int index = 0; index < kNumIndices; index++) {
+          Jinput += options.discount_factor * T[input](index, state) *
+                    J(Tind[input](index, state));
+        }
+        if (Jinput < Jnext(state)) {
+          Jnext(state) = Jinput;
+          Pi(state) = input;
+        }
+      }
+    }
+    max_diff = (J - Jnext).lpNorm<Eigen::Infinity>();
+    J = Jnext;
+    //    std::cout << "J  = " << J << std::endl;
+    //    std::cout << "Pi = " << Pi << std::endl;
+  }
+
+  // Create the policy.
+  Eigen::MatrixXd policy_values(input_mesh.get_input_size(), kNumStates);
+  for (int state = 0; state < kNumStates; state++) {
+    policy_values.col(state) = input_mesh.get_mesh_point(Pi(state));
+  }
+  auto policy = std::make_unique<BarycentricMeshSystem<double>>(state_mesh,
+                                                                policy_values);
+  return std::make_pair(std::move(policy), J);
+}
+
+}  // namespace controllers
+}  // namespace systems
+}  // namespace drake
diff --git a/systems/controllers/dynamic_programming.h b/systems/controllers/dynamic_programming.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include <memory>
+#include <set>
+#include <utility>
+
+#include "drake/math/barycentric.h"
+#include "drake/systems/analysis/simulator.h"
+#include "drake/systems/framework/vector_system.h"
+#include "drake/systems/primitives/barycentric_system.h"
+
+namespace drake {
+namespace systems {
+namespace controllers {
+
+/// Consolidates the many possible options to be passed to the dynamic
+/// programming algorithms.
+struct DynamicProgrammingOptions {
+  double discount_factor{1.};
+  std::set<int> state_indices_with_periodic_boundary_conditions;
+  double convergence_tol = 1e-4;
+  // TODO(russt): Add visualization callback
+};
+
+/// Implements Fitted Value Iteration on a (triangulated) Barycentric Mesh, as
+/// described in
+/// http://underactuated.csail.mit.edu/underactuated.html?chapter=dp .
+/// It currently requires that the system to be optimized has only continuous
+/// state and it is assumed to be time invariant.  This code makes a
+/// discrete-time approximation (using @p timestep) for the value iteration
+/// update.
+///
+/// @param simulator contains the reference to the System being optimized and to
+/// a Context for that system, which may contain non-default Parameters, etc.
+/// The @p simulator is run for @p timestep seconds from every point on the mesh
+/// in order to approximate the dynamics.. all of the simulation parameters
+/// (integrator, etc) are relevant during that evaluation.
+///
+/// @param cost_function is the instantaneous cost (referred to as g(x,u) in the
+/// notes.  The cost-to-go is incremented by g(x,u)*timestep on each step.
+/// @param state_grid defines the mesh on the state space used to represent
+/// the cost-to-go function and the resulting policy.
+/// @param input_grid defines the discrete action space used in the value
+/// iteraiton update.
+/// @param timestep a time in seconds used for the discrete-time approximation.
+/// @param options optional DynamicProgrammingOptions structure.
+///
+/// @return a std::pair containing the resulting policy, implemented as a
+/// BarycentricMeshSystem, and the MatrixXd J that defines the expected
+/// cost-to-go on a BarycentricMesh using @p state_grid.  The policy has a
+/// single vector input (which is the continuous state of the system passed
+/// in through @p simulator) and a single vector output (which is the input
+/// of the system passed in through @p simulator).
+///
+std::pair<std::unique_ptr<BarycentricMeshSystem<double>>, Eigen::MatrixXd>
+FittedValueIteration(
+    Simulator<double>* simulator,  // has system and context, as well
+    // as integrator params, etc.
+    const std::function<double(const Context<double>& context)>& cost_function,
+    const math::BarycentricMesh<double>::MeshGrid& state_grid,
+    const math::BarycentricMesh<double>::MeshGrid& input_grid,
+    const double timestep,
+    const DynamicProgrammingOptions& options = DynamicProgrammingOptions());
+
+// TODO(russt): Handle the specific case where system is control affine and the
+// cost function is quadratic positive-definite.  (Adds requirements on the
+// system and cost function (e.g. autodiff/symbolic), and doesn't need the
+// input_grid argument).
+
+// TODO(russt): Implement more general FittedValueIteration methods as the
+// function approximation tools become available.
+
+}  // namespace controllers
+}  // namespace systems
+}  // namespace drake