Skip to content

Commit

Permalink
controllers: add python bindings for finite-horizon LQR
Browse files Browse the repository at this point in the history
as discussed on slack, this uses a work-around to binding the std::unique_ptr member variables in FiniteLQRResult.

also includes a few small improvements to finite-horizon LQR, in preparation for the supporting discrete-time systems.
  • Loading branch information
RussTedrake committed Apr 26, 2020
1 parent 0ab3801 commit 1f5395f
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 32 deletions.
79 changes: 79 additions & 0 deletions bindings/pydrake/systems/controllers_py.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "drake/bindings/pydrake/symbolic_types_pybind.h"
#include "drake/multibody/plant/multibody_plant.h"
#include "drake/systems/controllers/dynamic_programming.h"
#include "drake/systems/controllers/finite_horizon_linear_quadratic_regulator.h"
#include "drake/systems/controllers/inverse_dynamics.h"
#include "drake/systems/controllers/inverse_dynamics_controller.h"
#include "drake/systems/controllers/linear_quadratic_regulator.h"
Expand All @@ -32,6 +33,7 @@ PYBIND11_MODULE(controllers, m) {
py::module::import("pydrake.symbolic");
py::module::import("pydrake.systems.framework");
py::module::import("pydrake.systems.primitives");
py::module::import("pydrake.trajectories");

py::class_<DynamicProgrammingOptions::PeriodicBoundaryCondition>(m,
"PeriodicBoundaryCondition",
Expand Down Expand Up @@ -242,6 +244,83 @@ PYBIND11_MODULE(controllers, m) {
py::arg("system"), py::arg("context"), py::arg("Q"), py::arg("R"),
py::arg("N") = Eigen::Matrix<double, 0, 0>::Zero(),
py::arg("input_port_index") = 0, doc.LinearQuadraticRegulator.doc_6args);

py::class_<FiniteHorizonLinearQuadraticRegulatorOptions>(m,
"FiniteHorizonLinearQuadraticRegulatorOptions",
doc.FiniteHorizonLinearQuadraticRegulatorOptions.doc)
.def(py::init<>(),
doc.FiniteHorizonLinearQuadraticRegulatorOptions.ctor.doc)
.def_readwrite("Qf", &FiniteHorizonLinearQuadraticRegulatorOptions::Qf,
doc.FiniteHorizonLinearQuadraticRegulatorOptions.Qf.doc)
.def_readwrite("x0", &FiniteHorizonLinearQuadraticRegulatorOptions::x0,
doc.FiniteHorizonLinearQuadraticRegulatorOptions.x0.doc)
.def_readwrite("u0", &FiniteHorizonLinearQuadraticRegulatorOptions::u0,
doc.FiniteHorizonLinearQuadraticRegulatorOptions.Qf.doc)
.def_readwrite("input_port_index",
&FiniteHorizonLinearQuadraticRegulatorOptions::input_port_index,
doc.FiniteHorizonLinearQuadraticRegulatorOptions.input_port_index
.doc);

py::class_<FiniteHorizonLinearQuadraticRegulatorResult>(m,
"FiniteHorizonLinearQuadraticRegulatorResult",
doc.FiniteHorizonLinearQuadraticRegulatorResult.doc)
// Note: Use these as a workaround for .def_readwrite not working with
// unique_ptr (yet).
.def("x0",
[](FiniteHorizonLinearQuadraticRegulatorResult* self) {
return self->x0.get();
},
py_reference_internal,
doc.FiniteHorizonLinearQuadraticRegulatorResult.x0.doc)
.def("x0",
[](FiniteHorizonLinearQuadraticRegulatorResult* self, double t) {
DRAKE_DEMAND(self->x0 != nullptr);
return self->x0->value(t);
},
py::arg("t"), doc.FiniteHorizonLinearQuadraticRegulatorResult.x0.doc)
.def("u0",
[](FiniteHorizonLinearQuadraticRegulatorResult* self) {
return self->u0.get();
},
py_reference_internal,
doc.FiniteHorizonLinearQuadraticRegulatorResult.u0.doc)
.def("u0",
[](FiniteHorizonLinearQuadraticRegulatorResult* self, double t) {
DRAKE_DEMAND(self->u0 != nullptr);
return self->u0->value(t);
},
py::arg("t"), doc.FiniteHorizonLinearQuadraticRegulatorResult.u0.doc)
.def("K",
[](FiniteHorizonLinearQuadraticRegulatorResult* self) {
return self->K.get();
},
py_reference_internal,
doc.FiniteHorizonLinearQuadraticRegulatorResult.K.doc)
.def("K",
[](FiniteHorizonLinearQuadraticRegulatorResult* self, double t) {
DRAKE_DEMAND(self->K != nullptr);
return self->K->value(t);
},
py::arg("t"), doc.FiniteHorizonLinearQuadraticRegulatorResult.K.doc)
.def("S",
[](FiniteHorizonLinearQuadraticRegulatorResult* self) {
return self->S.get();
},
py_reference_internal,
doc.FiniteHorizonLinearQuadraticRegulatorResult.S.doc)
.def("S",
[](FiniteHorizonLinearQuadraticRegulatorResult* self, double t) {
DRAKE_DEMAND(self->S != nullptr);
return self->S->value(t);
},
py::arg("t"), doc.FiniteHorizonLinearQuadraticRegulatorResult.S.doc);

m.def("FiniteHorizonLinearQuadraticRegulator",
&FiniteHorizonLinearQuadraticRegulator, py::arg("system"),
py::arg("context"), py::arg("t0"), py::arg("tf"), py::arg("Q"),
py::arg("R"),
py::arg("options") = FiniteHorizonLinearQuadraticRegulatorOptions(),
doc.FiniteHorizonLinearQuadraticRegulator.doc);
}

} // namespace pydrake
Expand Down
57 changes: 54 additions & 3 deletions bindings/pydrake/systems/test/controllers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,23 @@
from pydrake.multibody.parsing import Parser
from pydrake.systems.analysis import Simulator
from pydrake.systems.controllers import (
DiscreteTimeLinearQuadraticRegulator, DynamicProgrammingOptions,
DiscreteTimeLinearQuadraticRegulator,
DynamicProgrammingOptions,
FiniteHorizonLinearQuadraticRegulator,
FiniteHorizonLinearQuadraticRegulatorOptions,
FiniteHorizonLinearQuadraticRegulatorResult,
FittedValueIteration,
InverseDynamicsController, InverseDynamics,
InverseDynamicsController,
InverseDynamics,
LinearQuadraticRegulator,
LinearProgrammingApproximateDynamicProgramming,
PeriodicBoundaryCondition, PidControlledSystem, PidController
PeriodicBoundaryCondition,
PidControlledSystem,
PidController,
)
from pydrake.systems.framework import InputPortSelection
from pydrake.systems.primitives import Integrator, LinearSystem
from pydrake.trajectories import Trajectory


class TestControllers(unittest.TestCase):
Expand Down Expand Up @@ -260,3 +268,46 @@ def test_discrete_time_linear_quadratic_regulator(self):
(K, S) = DiscreteTimeLinearQuadraticRegulator(A, B, Q, R)
self.assertEqual(K.shape, (1, 2))
self.assertEqual(S.shape, (2, 2))

def test_finite_horizon_linear_quadratic_regulator(self):
A = np.array([[0, 1], [0, 0]])
B = np.array([[0], [1]])
C = np.identity(2)
D = np.array([[0], [0]])
double_integrator = LinearSystem(A, B, C, D)

Q = np.identity(2)
R = np.identity(1)

options = FiniteHorizonLinearQuadraticRegulatorOptions()
options.Qf = Q
self.assertIsNone(options.x0)
self.assertIsNone(options.u0)
self.assertEqual(options.input_port_index,
InputPortSelection.kUseFirstInputIfItExists)

context = double_integrator.CreateDefaultContext()
context.FixInputPort(0, [0.0])

result = FiniteHorizonLinearQuadraticRegulator(
system=double_integrator,
context=context,
t0=0,
tf=0.1,
Q=Q,
R=R,
options=options)

self.assertIsInstance(result,
FiniteHorizonLinearQuadraticRegulatorResult)

self.assertIsInstance(result.x0(), Trajectory)
self.assertEqual(result.x0(0).shape, (2, 1))
self.assertIsInstance(result.u0(), Trajectory)
self.assertEqual(result.u0(0).shape, (1, 1))
self.assertIsInstance(result.K(), Trajectory)
self.assertEqual(result.K().value(0).shape, (1, 2))
self.assertEqual(result.K(0).shape, (1, 2))
self.assertIsInstance(result.S(), Trajectory)
self.assertEqual(result.S().value(0).shape, (2, 2))
self.assertEqual(result.S(0).shape, (2, 2))
32 changes: 21 additions & 11 deletions systems/controllers/finite_horizon_linear_quadratic_regulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <vector>

#include "drake/common/drake_assert.h"
#include "drake/common/is_approx_equal_abstol.h"
#include "drake/math/autodiff.h"
#include "drake/math/autodiff_gradient.h"
#include "drake/math/matrix_util.h"
Expand All @@ -31,8 +32,7 @@ class RiccatiSystem : public LeafSystem<double> {
RiccatiSystem(const System<double>& system, const Context<double>& context,
const Eigen::Ref<const Eigen::MatrixXd>& Q,
const Eigen::Ref<const Eigen::MatrixXd>& R,
const Trajectory<double>& x0,
const Trajectory<double>& u0,
const Trajectory<double>& x0, const Trajectory<double>& u0,
const FiniteHorizonLinearQuadraticRegulatorOptions& options)
: system_(System<double>::ToAutoDiffXd(system)),
input_port_(
Expand Down Expand Up @@ -81,15 +81,20 @@ class RiccatiSystem : public LeafSystem<double> {
context_->SetTime(system_time);

// Get (time-varying) linearization of the plant.
auto autodiff_args = math::initializeAutoDiffTuple(
x0_.value(system_time), u0_.value(system_time));
auto autodiff_args = math::initializeAutoDiffTuple(x0_.value(system_time),
u0_.value(system_time));
context_->SetContinuousState(std::get<0>(autodiff_args));
input_port_->FixValue(context_.get(), std::get<1>(autodiff_args));

const VectorX<AutoDiffXd> autodiff_xdot0 =
system_->EvalTimeDerivatives(*context_).CopyToVector();

const Eigen::VectorXd xdot0 = math::autoDiffToValueMatrix(autodiff_xdot0);
// Note: This version assumes that the nominal trajectory is a feasible
// trajectory of the system.
// TODO(russt): Implement the affine terms, or add a check here to make sure
// that math::autoDiffToValueMatrix(autodiff_xdot0) ~=
// x0_.EvalDerivative(system_time). Implementing he check requires #13144.

const Eigen::MatrixXd AB = math::autoDiffToGradientMatrix(autodiff_xdot0);
const Eigen::Ref<const Eigen::MatrixXd>& A = AB.leftCols(num_states_);
const Eigen::Ref<const Eigen::MatrixXd>& B = AB.rightCols(num_inputs_);
Expand Down Expand Up @@ -170,7 +175,7 @@ FiniteHorizonLinearQuadraticRegulator(
DRAKE_DEMAND(system.num_input_ports() > 0);
DRAKE_DEMAND(tf > t0);
const int num_states = context.num_total_states();
std::unique_ptr<Trajectory<double>> x0;
std::unique_ptr<PiecewisePolynomial<double>> x0;
if (options.x0) {
DRAKE_DEMAND(options.x0->start_time() <= t0);
DRAKE_DEMAND(options.x0->end_time() >= tf);
Expand All @@ -180,7 +185,7 @@ FiniteHorizonLinearQuadraticRegulator(
context.get_continuous_state_vector().CopyToVector());
}

std::unique_ptr<Trajectory<double>> u0;
std::unique_ptr<PiecewisePolynomial<double>> u0;
if (options.u0) {
DRAKE_DEMAND(options.u0->start_time() <= t0);
DRAKE_DEMAND(options.u0->end_time() >= tf);
Expand Down Expand Up @@ -218,10 +223,15 @@ FiniteHorizonLinearQuadraticRegulator(
simulator.AdvanceTo(-t0);

FiniteHorizonLinearQuadraticRegulatorResult result;
result.S = std::move(*(integrator.StopDenseIntegration()));
result.S.ReverseTime();
result.S.Reshape(num_states, num_states);
result.K = riccati.MakeKTrajectory(result.S);
std::unique_ptr<PiecewisePolynomial<double>> S =
integrator.StopDenseIntegration();
S->ReverseTime();
S->Reshape(num_states, num_states);
result.K = std::make_unique<PiecewisePolynomial<double>>(
riccati.MakeKTrajectory(*S));
result.x0 = options.x0 ? options.x0->Clone() : std::move(x0);
result.u0 = options.u0 ? options.u0->Clone() : std::move(u0);
result.S = std::move(S);

return result;
}
Expand Down
16 changes: 10 additions & 6 deletions systems/controllers/finite_horizon_linear_quadratic_regulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ A structure to facilitate passing the myriad of optional arguments to the
FiniteHorizonLinearQuadraticRegulator algorithms.
*/
struct FiniteHorizonLinearQuadraticRegulatorOptions {
FiniteHorizonLinearQuadraticRegulatorOptions() = default;

/**
A num_states x num_states positive semi-definite matrix which specified the
cost at the final time. If unset, then Qf will be set to the zero matrix.
Expand Down Expand Up @@ -52,8 +54,10 @@ results. The finite-horizon cost-to-go is given by (x-x0(t))'*S(t)*(x-x0(t)) and
the optimal controller is given by u-u0(t) = -K(t)*(x-x0(t)).
*/
struct FiniteHorizonLinearQuadraticRegulatorResult {
trajectories::PiecewisePolynomial<double> K;
trajectories::PiecewisePolynomial<double> S;
std::unique_ptr<trajectories::Trajectory<double>> x0;
std::unique_ptr<trajectories::Trajectory<double>> u0;
std::unique_ptr<trajectories::Trajectory<double>> K;
std::unique_ptr<trajectories::Trajectory<double>> S;
};

// TODO(russt): Add support for difference-equation systems.
Expand All @@ -73,10 +77,10 @@ optimal cost-to-go for the finite-horizon linear quadratic regulator:
@f]
where A(t), B(t), and c(t) are taken from the gradients of the continuous-time
dynamics ẋ = f(t,x,u), as A(t) = dfdx(t, x0(t), u0(t)), B(t) = dfdu(t,
x0(t), u0(t)), and c(t) = f(t, x0(t), u0(t)). x0(t) and u0(t) can be specified
in @p options, otherwise are taken to be constant trajectories with values
given by @p context.
dynamics ẋ = f(t,x,u), as A(t) = dfdx(t, x0(t), u0(t)), B(t) = dfdu(t, x0(t),
u0(t)), and c(t) = f(t, x0(t), u0(t)). x0(t) and u0(t) can be specified in @p
options, otherwise are taken to be constant trajectories with values given by @p
context. The current implementation assumes that ẋ0(t) = f(t, x0(t), u0(t)).
@param system a System<double> representing the plant.
@param context a Context<double> used to pass the default input, state, and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,26 @@ GTEST_TEST(FiniteHorizonLQRTest, InfiniteHorizonTest) {
FiniteHorizonLinearQuadraticRegulatorResult result =
FiniteHorizonLinearQuadraticRegulator(sys, *context, t0, tf, Q, R,
options);
EXPECT_TRUE(result.S.start_time() == t0);
EXPECT_TRUE(result.S.end_time() == tf);
EXPECT_TRUE(result.S->start_time() == t0);
EXPECT_TRUE(result.S->end_time() == tf);
// Confirm that it's initialized to zero.
EXPECT_TRUE(result.S.value(tf).isZero(1e-12));
EXPECT_TRUE(result.S->value(tf).isZero(1e-12));
// Confirm that it converges to the solution.
EXPECT_TRUE(CompareMatrices(result.S.value(t0), lqr_result.S, 1e-5));
EXPECT_TRUE(result.K.start_time() == t0);
EXPECT_TRUE(result.K.end_time() == tf);
EXPECT_TRUE(CompareMatrices(result.K.value(t0), lqr_result.K, 1e-5));
EXPECT_TRUE(CompareMatrices(result.S->value(t0), lqr_result.S, 1e-5));
EXPECT_TRUE(result.K->start_time() == t0);
EXPECT_TRUE(result.K->end_time() == tf);
EXPECT_TRUE(CompareMatrices(result.K->value(t0), lqr_result.K, 1e-5));
EXPECT_TRUE(CompareMatrices(result.x0->value(t0), Eigen::Vector2d::Zero()));
EXPECT_TRUE(CompareMatrices(result.x0->value(tf), Eigen::Vector2d::Zero()));
EXPECT_TRUE(CompareMatrices(result.u0->value(t0), Vector1d::Zero()));
EXPECT_TRUE(CompareMatrices(result.u0->value(tf), Vector1d::Zero()));

// Test that it stays at the fixed-point if initialized at the fixed point.
options.Qf = lqr_result.S;
result = FiniteHorizonLinearQuadraticRegulator(sys, *context, t0, t0 + 0.1, Q,
R, options);
EXPECT_TRUE(CompareMatrices(result.S.value(t0), lqr_result.S, 1e-12));
EXPECT_TRUE(CompareMatrices(result.K.value(t0), lqr_result.K, 1e-12));
EXPECT_TRUE(CompareMatrices(result.S->value(t0), lqr_result.S, 1e-12));
EXPECT_TRUE(CompareMatrices(result.K->value(t0), lqr_result.K, 1e-12));
}

// Verify that we can stabilize a non-zero fixed-point specified via the
Expand Down Expand Up @@ -97,9 +101,13 @@ GTEST_TEST(FiniteHorizonLQRTest, NominalTrajectoryTest) {
FiniteHorizonLinearQuadraticRegulator(*system, *context, t0, tf,
Q, R, options);
EXPECT_TRUE(
CompareMatrices(result.S.value(t0), result.S.value(tf), 1e-12));
EXPECT_TRUE(CompareMatrices(result.S.value(t0), lqr_result.S, 1e-12));
EXPECT_TRUE(CompareMatrices(result.K.value(t0), lqr_result.K, 1e-12));
CompareMatrices(result.S->value(t0), result.S->value(tf), 1e-12));
EXPECT_TRUE(CompareMatrices(result.S->value(t0), lqr_result.S, 1e-12));
EXPECT_TRUE(CompareMatrices(result.K->value(t0), lqr_result.K, 1e-12));
EXPECT_TRUE(CompareMatrices(result.x0->value(t0), x0v));
EXPECT_TRUE(CompareMatrices(result.x0->value(tf), x0v));
EXPECT_TRUE(CompareMatrices(result.u0->value(t0), u0v));
EXPECT_TRUE(CompareMatrices(result.u0->value(tf), u0v));
}
}

Expand Down

0 comments on commit 1f5395f

Please sign in to comment.