Skip to content

Commit

Permalink
controllers: add python bindings for finite-horizon LQR
Browse files Browse the repository at this point in the history
as discussed on slack, this uses a work-around to binding the std::unique_ptr member variables in FiniteLQRResult.

also includes a few small improvements to finite-horizon LQR, in preparation for the supporting discrete-time systems.
  • Loading branch information
RussTedrake committed Apr 29, 2020
1 parent 0ab3801 commit 92cc65e
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 35 deletions.
11 changes: 11 additions & 0 deletions bindings/pydrake/common/test/wrap_pybind_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from pydrake.common.wrap_test_util import (
MyContainer,
MyUniquePtr,
MyValue,
)

Expand All @@ -23,3 +24,13 @@ def test_read_write_keep_alive(self):
gc.collect()
# Ensure that we have not lost our value.
self.assertTrue(c.member.value == 7.)

def test_unique_ptr_keep_alive(self):
u = MyUniquePtr(42)
self.assertEqual(u.member, 42)
val = u.member
# Ensure that the getter keeps the container alive.
del u
gc.collect()
# Ensure that we have not lost our value.
self.assertEqual(val, 42)
13 changes: 12 additions & 1 deletion bindings/pydrake/common/test/wrap_test_util_py.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ struct MyContainer {
const MyValue* member{nullptr};
};

struct MyUniquePtr {
explicit MyUniquePtr(double val) : member(new double(val)) {}
std::unique_ptr<double> member;
};

} // namespace

PYBIND11_MODULE(wrap_test_util, m) {
Expand All @@ -27,7 +32,13 @@ PYBIND11_MODULE(wrap_test_util, m) {
py::class_<MyContainer> my_container(m, "MyContainer");
my_container // BR
.def(py::init<>());
DefReadWriteKeepAlive(&my_container, "member", &MyContainer::member);
DefReadWriteKeepAlive(
&my_container, "member", &MyContainer::member, "MyContainer doc");

py::class_<MyUniquePtr> my_unique(m, "MyUniquePtr");
my_unique.def(py::init<double>());
DefReadUniquePtr(
&my_unique, "member", &MyUniquePtr::member, "MyUniquePtr doc");
}

} // namespace pydrake
Expand Down
23 changes: 21 additions & 2 deletions bindings/pydrake/common/wrap_pybind.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#pragma once

#include <memory>
#include <string>
#include <utility>

Expand Down Expand Up @@ -68,14 +69,32 @@ auto WrapCallbacks(Func&& func) {
/// @tparam Class the C++ class.
/// @tparam T type for the member we wish to apply keep alive semantics.
template <typename PyClass, typename Class, typename T>
void DefReadWriteKeepAlive(PyClass* cls, const char* name, T Class::*member) {
void DefReadWriteKeepAlive(
PyClass* cls, const char* name, T Class::*member, const char* doc = "") {
auto getter = [member](const Class* obj) { return obj->*member; };
auto setter = [member](Class* obj, const T& value) { obj->*member = value; };
cls->def_property(name, // BR
py::cpp_function(getter),
py::cpp_function(setter,
// Keep alive, reference: `self` keeps `value` alive.
py::keep_alive<1, 2>()));
py::keep_alive<1, 2>()),
doc);
}

/// Idempotent to pybind11's `def_readonly()`, which works for unique_ptr
/// elements; the getter is protected with keep_alive on a `member` variable
/// that is a unique_ptr.
///
/// @tparam PyClass the python class.
/// @tparam Class the C++ class.
/// @tparam T type for the member we wish to apply keep alive semantics.
template <typename PyClass, typename Class, typename T>
void DefReadUniquePtr(PyClass* cls, const char* name,
const std::unique_ptr<T> Class::*member, const char* doc = "") {
auto getter = py::cpp_function(
[member](const Class* obj) { return (obj->*member).get(); },
py_reference_internal);
cls->def_property_readonly(name, getter, doc);
}

} // namespace pydrake
Expand Down
44 changes: 44 additions & 0 deletions bindings/pydrake/systems/controllers_py.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "drake/bindings/pydrake/symbolic_types_pybind.h"
#include "drake/multibody/plant/multibody_plant.h"
#include "drake/systems/controllers/dynamic_programming.h"
#include "drake/systems/controllers/finite_horizon_linear_quadratic_regulator.h"
#include "drake/systems/controllers/inverse_dynamics.h"
#include "drake/systems/controllers/inverse_dynamics_controller.h"
#include "drake/systems/controllers/linear_quadratic_regulator.h"
Expand All @@ -32,6 +33,7 @@ PYBIND11_MODULE(controllers, m) {
py::module::import("pydrake.symbolic");
py::module::import("pydrake.systems.framework");
py::module::import("pydrake.systems.primitives");
py::module::import("pydrake.trajectories");

py::class_<DynamicProgrammingOptions::PeriodicBoundaryCondition>(m,
"PeriodicBoundaryCondition",
Expand Down Expand Up @@ -242,6 +244,48 @@ PYBIND11_MODULE(controllers, m) {
py::arg("system"), py::arg("context"), py::arg("Q"), py::arg("R"),
py::arg("N") = Eigen::Matrix<double, 0, 0>::Zero(),
py::arg("input_port_index") = 0, doc.LinearQuadraticRegulator.doc_6args);

py::class_<FiniteHorizonLinearQuadraticRegulatorOptions> fhlqr_options(m,
"FiniteHorizonLinearQuadraticRegulatorOptions",
doc.FiniteHorizonLinearQuadraticRegulatorOptions.doc);
fhlqr_options
.def(py::init<>(),
doc.FiniteHorizonLinearQuadraticRegulatorOptions.ctor.doc)
.def_readwrite("Qf", &FiniteHorizonLinearQuadraticRegulatorOptions::Qf,
doc.FiniteHorizonLinearQuadraticRegulatorOptions.Qf.doc)
.def_readwrite("input_port_index",
&FiniteHorizonLinearQuadraticRegulatorOptions::input_port_index,
doc.FiniteHorizonLinearQuadraticRegulatorOptions.input_port_index
.doc);
DefReadWriteKeepAlive(&fhlqr_options, "x0",
&FiniteHorizonLinearQuadraticRegulatorOptions::x0,
doc.FiniteHorizonLinearQuadraticRegulatorOptions.x0.doc);
DefReadWriteKeepAlive(&fhlqr_options, "u0",
&FiniteHorizonLinearQuadraticRegulatorOptions::u0,
doc.FiniteHorizonLinearQuadraticRegulatorOptions.u0.doc);

py::class_<FiniteHorizonLinearQuadraticRegulatorResult> fhlqr_result(m,
"FiniteHorizonLinearQuadraticRegulatorResult",
doc.FiniteHorizonLinearQuadraticRegulatorResult.doc);
DefReadUniquePtr(&fhlqr_result, "x0",
&FiniteHorizonLinearQuadraticRegulatorResult::x0,
doc.FiniteHorizonLinearQuadraticRegulatorResult.x0.doc);
DefReadUniquePtr(&fhlqr_result, "u0",
&FiniteHorizonLinearQuadraticRegulatorResult::u0,
doc.FiniteHorizonLinearQuadraticRegulatorResult.u0.doc);
DefReadUniquePtr(&fhlqr_result, "K",
&FiniteHorizonLinearQuadraticRegulatorResult::K,
doc.FiniteHorizonLinearQuadraticRegulatorResult.K.doc);
DefReadUniquePtr(&fhlqr_result, "S",
&FiniteHorizonLinearQuadraticRegulatorResult::S,
doc.FiniteHorizonLinearQuadraticRegulatorResult.S.doc);

m.def("FiniteHorizonLinearQuadraticRegulator",
&FiniteHorizonLinearQuadraticRegulator, py::arg("system"),
py::arg("context"), py::arg("t0"), py::arg("tf"), py::arg("Q"),
py::arg("R"),
py::arg("options") = FiniteHorizonLinearQuadraticRegulatorOptions(),
doc.FiniteHorizonLinearQuadraticRegulator.doc);
}

} // namespace pydrake
Expand Down
55 changes: 52 additions & 3 deletions bindings/pydrake/systems/test/controllers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,23 @@
from pydrake.multibody.parsing import Parser
from pydrake.systems.analysis import Simulator
from pydrake.systems.controllers import (
DiscreteTimeLinearQuadraticRegulator, DynamicProgrammingOptions,
DiscreteTimeLinearQuadraticRegulator,
DynamicProgrammingOptions,
FiniteHorizonLinearQuadraticRegulator,
FiniteHorizonLinearQuadraticRegulatorOptions,
FiniteHorizonLinearQuadraticRegulatorResult,
FittedValueIteration,
InverseDynamicsController, InverseDynamics,
InverseDynamicsController,
InverseDynamics,
LinearQuadraticRegulator,
LinearProgrammingApproximateDynamicProgramming,
PeriodicBoundaryCondition, PidControlledSystem, PidController
PeriodicBoundaryCondition,
PidControlledSystem,
PidController,
)
from pydrake.systems.framework import InputPortSelection
from pydrake.systems.primitives import Integrator, LinearSystem
from pydrake.trajectories import Trajectory


class TestControllers(unittest.TestCase):
Expand Down Expand Up @@ -260,3 +268,44 @@ def test_discrete_time_linear_quadratic_regulator(self):
(K, S) = DiscreteTimeLinearQuadraticRegulator(A, B, Q, R)
self.assertEqual(K.shape, (1, 2))
self.assertEqual(S.shape, (2, 2))

def test_finite_horizon_linear_quadratic_regulator(self):
A = np.array([[0, 1], [0, 0]])
B = np.array([[0], [1]])
C = np.identity(2)
D = np.array([[0], [0]])
double_integrator = LinearSystem(A, B, C, D)

Q = np.identity(2)
R = np.identity(1)

options = FiniteHorizonLinearQuadraticRegulatorOptions()
options.Qf = Q
self.assertIsNone(options.x0)
self.assertIsNone(options.u0)
self.assertEqual(options.input_port_index,
InputPortSelection.kUseFirstInputIfItExists)

context = double_integrator.CreateDefaultContext()
context.FixInputPort(0, [0.0])

result = FiniteHorizonLinearQuadraticRegulator(
system=double_integrator,
context=context,
t0=0,
tf=0.1,
Q=Q,
R=R,
options=options)

self.assertIsInstance(result,
FiniteHorizonLinearQuadraticRegulatorResult)

self.assertIsInstance(result.x0, Trajectory)
self.assertEqual(result.x0.value(0).shape, (2, 1))
self.assertIsInstance(result.u0, Trajectory)
self.assertEqual(result.u0.value(0).shape, (1, 1))
self.assertIsInstance(result.K, Trajectory)
self.assertEqual(result.K.value(0).shape, (1, 2))
self.assertIsInstance(result.S, Trajectory)
self.assertEqual(result.S.value(0).shape, (2, 2))
30 changes: 19 additions & 11 deletions systems/controllers/finite_horizon_linear_quadratic_regulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <vector>

#include "drake/common/drake_assert.h"
#include "drake/common/is_approx_equal_abstol.h"
#include "drake/math/autodiff.h"
#include "drake/math/autodiff_gradient.h"
#include "drake/math/matrix_util.h"
Expand All @@ -31,8 +32,7 @@ class RiccatiSystem : public LeafSystem<double> {
RiccatiSystem(const System<double>& system, const Context<double>& context,
const Eigen::Ref<const Eigen::MatrixXd>& Q,
const Eigen::Ref<const Eigen::MatrixXd>& R,
const Trajectory<double>& x0,
const Trajectory<double>& u0,
const Trajectory<double>& x0, const Trajectory<double>& u0,
const FiniteHorizonLinearQuadraticRegulatorOptions& options)
: system_(System<double>::ToAutoDiffXd(system)),
input_port_(
Expand Down Expand Up @@ -81,15 +81,18 @@ class RiccatiSystem : public LeafSystem<double> {
context_->SetTime(system_time);

// Get (time-varying) linearization of the plant.
auto autodiff_args = math::initializeAutoDiffTuple(
x0_.value(system_time), u0_.value(system_time));
auto autodiff_args = math::initializeAutoDiffTuple(x0_.value(system_time),
u0_.value(system_time));
context_->SetContinuousState(std::get<0>(autodiff_args));
input_port_->FixValue(context_.get(), std::get<1>(autodiff_args));

const VectorX<AutoDiffXd> autodiff_xdot0 =
system_->EvalTimeDerivatives(*context_).CopyToVector();

const Eigen::VectorXd xdot0 = math::autoDiffToValueMatrix(autodiff_xdot0);
// Note: This version assumes that the nominal trajectory is a feasible
// trajectory of the system.
// TODO(russt): Implement the affine terms.

const Eigen::MatrixXd AB = math::autoDiffToGradientMatrix(autodiff_xdot0);
const Eigen::Ref<const Eigen::MatrixXd>& A = AB.leftCols(num_states_);
const Eigen::Ref<const Eigen::MatrixXd>& B = AB.rightCols(num_inputs_);
Expand Down Expand Up @@ -170,7 +173,7 @@ FiniteHorizonLinearQuadraticRegulator(
DRAKE_DEMAND(system.num_input_ports() > 0);
DRAKE_DEMAND(tf > t0);
const int num_states = context.num_total_states();
std::unique_ptr<Trajectory<double>> x0;
std::unique_ptr<PiecewisePolynomial<double>> x0;
if (options.x0) {
DRAKE_DEMAND(options.x0->start_time() <= t0);
DRAKE_DEMAND(options.x0->end_time() >= tf);
Expand All @@ -180,7 +183,7 @@ FiniteHorizonLinearQuadraticRegulator(
context.get_continuous_state_vector().CopyToVector());
}

std::unique_ptr<Trajectory<double>> u0;
std::unique_ptr<PiecewisePolynomial<double>> u0;
if (options.u0) {
DRAKE_DEMAND(options.u0->start_time() <= t0);
DRAKE_DEMAND(options.u0->end_time() >= tf);
Expand Down Expand Up @@ -218,10 +221,15 @@ FiniteHorizonLinearQuadraticRegulator(
simulator.AdvanceTo(-t0);

FiniteHorizonLinearQuadraticRegulatorResult result;
result.S = std::move(*(integrator.StopDenseIntegration()));
result.S.ReverseTime();
result.S.Reshape(num_states, num_states);
result.K = riccati.MakeKTrajectory(result.S);
std::unique_ptr<PiecewisePolynomial<double>> S =
integrator.StopDenseIntegration();
S->ReverseTime();
S->Reshape(num_states, num_states);
result.K = std::make_unique<PiecewisePolynomial<double>>(
riccati.MakeKTrajectory(*S));
result.x0 = options.x0 ? options.x0->Clone() : std::move(x0);
result.u0 = options.u0 ? options.u0->Clone() : std::move(u0);
result.S = std::move(S);

return result;
}
Expand Down
16 changes: 10 additions & 6 deletions systems/controllers/finite_horizon_linear_quadratic_regulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ A structure to facilitate passing the myriad of optional arguments to the
FiniteHorizonLinearQuadraticRegulator algorithms.
*/
struct FiniteHorizonLinearQuadraticRegulatorOptions {
FiniteHorizonLinearQuadraticRegulatorOptions() = default;

/**
A num_states x num_states positive semi-definite matrix which specified the
cost at the final time. If unset, then Qf will be set to the zero matrix.
Expand Down Expand Up @@ -52,8 +54,10 @@ results. The finite-horizon cost-to-go is given by (x-x0(t))'*S(t)*(x-x0(t)) and
the optimal controller is given by u-u0(t) = -K(t)*(x-x0(t)).
*/
struct FiniteHorizonLinearQuadraticRegulatorResult {
trajectories::PiecewisePolynomial<double> K;
trajectories::PiecewisePolynomial<double> S;
std::unique_ptr<trajectories::Trajectory<double>> x0;
std::unique_ptr<trajectories::Trajectory<double>> u0;
std::unique_ptr<trajectories::Trajectory<double>> K;
std::unique_ptr<trajectories::Trajectory<double>> S;
};

// TODO(russt): Add support for difference-equation systems.
Expand All @@ -73,10 +77,10 @@ optimal cost-to-go for the finite-horizon linear quadratic regulator:
@f]
where A(t), B(t), and c(t) are taken from the gradients of the continuous-time
dynamics ẋ = f(t,x,u), as A(t) = dfdx(t, x0(t), u0(t)), B(t) = dfdu(t,
x0(t), u0(t)), and c(t) = f(t, x0(t), u0(t)). x0(t) and u0(t) can be specified
in @p options, otherwise are taken to be constant trajectories with values
given by @p context.
dynamics ẋ = f(t,x,u), as A(t) = dfdx(t, x0(t), u0(t)), B(t) = dfdu(t, x0(t),
u0(t)), and c(t) = f(t, x0(t), u0(t)). x0(t) and u0(t) can be specified in @p
options, otherwise are taken to be constant trajectories with values given by @p
context. The current implementation assumes that ẋ0(t) = f(t, x0(t), u0(t)).
@param system a System<double> representing the plant.
@param context a Context<double> used to pass the default input, state, and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,26 @@ GTEST_TEST(FiniteHorizonLQRTest, InfiniteHorizonTest) {
FiniteHorizonLinearQuadraticRegulatorResult result =
FiniteHorizonLinearQuadraticRegulator(sys, *context, t0, tf, Q, R,
options);
EXPECT_TRUE(result.S.start_time() == t0);
EXPECT_TRUE(result.S.end_time() == tf);
EXPECT_TRUE(result.S->start_time() == t0);
EXPECT_TRUE(result.S->end_time() == tf);
// Confirm that it's initialized to zero.
EXPECT_TRUE(result.S.value(tf).isZero(1e-12));
EXPECT_TRUE(result.S->value(tf).isZero(1e-12));
// Confirm that it converges to the solution.
EXPECT_TRUE(CompareMatrices(result.S.value(t0), lqr_result.S, 1e-5));
EXPECT_TRUE(result.K.start_time() == t0);
EXPECT_TRUE(result.K.end_time() == tf);
EXPECT_TRUE(CompareMatrices(result.K.value(t0), lqr_result.K, 1e-5));
EXPECT_TRUE(CompareMatrices(result.S->value(t0), lqr_result.S, 1e-5));
EXPECT_TRUE(result.K->start_time() == t0);
EXPECT_TRUE(result.K->end_time() == tf);
EXPECT_TRUE(CompareMatrices(result.K->value(t0), lqr_result.K, 1e-5));
EXPECT_TRUE(CompareMatrices(result.x0->value(t0), Eigen::Vector2d::Zero()));
EXPECT_TRUE(CompareMatrices(result.x0->value(tf), Eigen::Vector2d::Zero()));
EXPECT_TRUE(CompareMatrices(result.u0->value(t0), Vector1d::Zero()));
EXPECT_TRUE(CompareMatrices(result.u0->value(tf), Vector1d::Zero()));

// Test that it stays at the fixed-point if initialized at the fixed point.
options.Qf = lqr_result.S;
result = FiniteHorizonLinearQuadraticRegulator(sys, *context, t0, t0 + 0.1, Q,
R, options);
EXPECT_TRUE(CompareMatrices(result.S.value(t0), lqr_result.S, 1e-12));
EXPECT_TRUE(CompareMatrices(result.K.value(t0), lqr_result.K, 1e-12));
EXPECT_TRUE(CompareMatrices(result.S->value(t0), lqr_result.S, 1e-12));
EXPECT_TRUE(CompareMatrices(result.K->value(t0), lqr_result.K, 1e-12));
}

// Verify that we can stabilize a non-zero fixed-point specified via the
Expand Down Expand Up @@ -97,9 +101,13 @@ GTEST_TEST(FiniteHorizonLQRTest, NominalTrajectoryTest) {
FiniteHorizonLinearQuadraticRegulator(*system, *context, t0, tf,
Q, R, options);
EXPECT_TRUE(
CompareMatrices(result.S.value(t0), result.S.value(tf), 1e-12));
EXPECT_TRUE(CompareMatrices(result.S.value(t0), lqr_result.S, 1e-12));
EXPECT_TRUE(CompareMatrices(result.K.value(t0), lqr_result.K, 1e-12));
CompareMatrices(result.S->value(t0), result.S->value(tf), 1e-12));
EXPECT_TRUE(CompareMatrices(result.S->value(t0), lqr_result.S, 1e-12));
EXPECT_TRUE(CompareMatrices(result.K->value(t0), lqr_result.K, 1e-12));
EXPECT_TRUE(CompareMatrices(result.x0->value(t0), x0v));
EXPECT_TRUE(CompareMatrices(result.x0->value(tf), x0v));
EXPECT_TRUE(CompareMatrices(result.u0->value(t0), u0v));
EXPECT_TRUE(CompareMatrices(result.u0->value(tf), u0v));
}
}

Expand Down

0 comments on commit 92cc65e

Please sign in to comment.