Skip to content

Commit

Permalink
Adds ReexpressSpatialVector
Browse files Browse the repository at this point in the history
  • Loading branch information
sherm1 committed Dec 16, 2024
1 parent e39b09e commit 5adfeef
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 13 deletions.
14 changes: 14 additions & 0 deletions math/fast_pose_composition_functions.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include "drake/common/eigen_types.h"

namespace drake {
namespace math {

Expand Down Expand Up @@ -66,6 +68,18 @@ void ComposeXinvX(const RigidTransform<double>& X_BA,
const RigidTransform<double>& X_BC,
RigidTransform<double>* X_AC);

// TODO(sherm1) Consider making the signature take SpatialVector instead of
// Vector6.
/* Re-express a spatial vector via `S_A = R_AB * S_B`. A spatial vector is
a Vector6 composed of two 3-element Vector3's that are re-expressed
independently. It is OK if S_A is exactly the same memory as S_B, but not if
they partially overlap or overlap with R_AB.
This requires 30 floating point operations but can be done very efficiently
exploing SIMD instructions when available. */
void ReexpressSpatialVector(const RotationMatrix<double>& R_AB,
const Vector6<double>& V_B,
Vector6<double>* V_A);

} // namespace internal
} // namespace math
} // namespace drake
87 changes: 87 additions & 0 deletions math/fast_pose_composition_functions_avx2_fma.cc
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,62 @@ void ComposeXinvXImpl(const double* X_BA, const double* X_BC, double* X_AC) {
hn::StoreN(stu_, tag, X_AC + 9, 3); // 3-wide write to stay in bounds
}

/* Re-express SpatialVector via V_A = R_AB * V_B.
R_AB is 9 consecutive doubles in column-major order, the vectors are 6
consecutive elements comprising two independent 3-vectors. It is allowable for
V_A and V_B to be the same memory.
R_AB = abcdefghi
V_B = xyzrst
V_A = XYZRST
We want to perform two matrix-vector products:
V_A R_AB V_B
X a d g x
Y = b e h @ y 15 flops
Z c f i z
R a d g r
S = b e h @ s 15 flops
T c f i t
We can do this in 6 SIMD instructions. We end up doing 40 flops and throwing
10 of them away.
*/
void ReexpressSpatialVectorImpl(const double* R_AB,
const double* V_B,
double* V_A) {
const hn::FixedTag<double, 4> tag;

const auto abc_ = hn::LoadU(tag, R_AB); // (d is loaded but unused)
const auto def_ = hn::LoadU(tag, R_AB + 3); // (g is loaded but unused)
const auto ghi_ = hn::LoadN(tag, R_AB + 6, 3);

const auto xxx_ = hn::Set(tag, V_B[0]);
const auto yyy_ = hn::Set(tag, V_B[1]);
const auto zzz_ = hn::Set(tag, V_B[2]);

// Vector XYZ: X Y Z _
auto XYZ_ = hn::Mul(abc_, xxx_); // ax bx cx _
XYZ_ = hn::MulAdd(def_, yyy_, XYZ_); // +dy +ey +fy _
XYZ_ = hn::MulAdd(ghi_, zzz_, XYZ_); // +gz +hz +iz _

const auto rrr_ = hn::Set(tag, V_B[3]);
const auto sss_ = hn::Set(tag, V_B[4]);
const auto ttt_ = hn::Set(tag, V_B[5]);

// Vector RST: R S T _
auto RST_ = hn::Mul(abc_, rrr_); // ar br cr _
RST_ = hn::MulAdd(def_, sss_, RST_); // +ds +es +fs _
RST_ = hn::MulAdd(ghi_, ttt_, RST_); // +gt +ht +it _

hn::StoreU(XYZ_, tag, V_A); // 4-wide write temporarily overwrites R
hn::StoreN(RST_, tag, V_A + 3, 3); // 3-wide write to stay in bounds
}

#else // HWY_MAX_BYTES

/* The portable versions are always defined. They should be written to maximize
Expand Down Expand Up @@ -697,6 +753,20 @@ void ComposeXinvXImpl(const double* X_BA, const double* X_BC, double* X_AC) {
ComposeXinvXNoAlias(X_BA, X_BC, X_AC_temp);
std::copy(X_AC_temp, X_AC_temp + 12, X_AC);
}
void ReexpressSpatialVectorImpl(const double* R_AB,
const double* V_B,
double* V_A) {
DRAKE_ASSERT(V_A != nullptr);
double x, y, z; // Protect from overlap with V_B.
x = row_x_col(&R_AB[0], &V_B[0]);
y = row_x_col(&R_AB[1], &V_B[0]);
z = row_x_col(&R_AB[2], &V_B[0]);
V_A[0] = x; V_A[1] = y; V_A[2] = z;
x = row_x_col(&R_AB[0], &V_B[3]);
y = row_x_col(&R_AB[1], &V_B[3]);
z = row_x_col(&R_AB[2], &V_B[3]);
V_A[3] = x; V_A[4] = y; V_A[5] = z;
}

#endif // HWY_MAX_BYTES

Expand Down Expand Up @@ -732,6 +802,10 @@ HWY_EXPORT(ComposeXinvXImpl);
struct ChooseBestComposeXinvX {
auto operator()() { return HWY_DYNAMIC_POINTER(ComposeXinvXImpl); }
};
HWY_EXPORT(ReexpressSpatialVectorImpl);
struct ChooseBestReexpressSpatialVector {
auto operator()() { return HWY_DYNAMIC_POINTER(ReexpressSpatialVectorImpl); }
};

// These sugar functions convert C++ types into bare arrays.
const double* GetRawData(const RotationMatrix<double>& R) {
Expand All @@ -750,6 +824,12 @@ double* GetRawData(RigidTransform<double>* X) {
// the rotation matrix first followed by the translation.
return const_cast<double*>(X->rotation().matrix().data());
}
const double* GetRawData(const Vector6<double>& V) {
return V.data();
}
double* GetRawData(Vector6<double>* V) {
return V->data();
}

} // namespace

Expand Down Expand Up @@ -781,6 +861,13 @@ void ComposeXinvX(const RigidTransform<double>& X_BA,
GetRawData(X_BA), GetRawData(X_BC), GetRawData(X_AC));
}

void ReexpressSpatialVector(const RotationMatrix<double>& R_AB,
const Vector6<double>& V_B,
Vector6<double>* V_A) {
LateBoundFunction<ChooseBestReexpressSpatialVector>::Call(
GetRawData(R_AB), GetRawData(V_B), GetRawData(V_A));
}

} // namespace internal
} // namespace math
} // namespace drake
Expand Down
17 changes: 17 additions & 0 deletions math/test/fast_pose_composition_functions_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,23 @@ TEST_P(FastPoseCompositionFunctions, ComposeXX) {
EXPECT_TRUE(CompareMatrices(arg3->GetAsMatrix4(), expected));
}

GTEST_TEST(FastReexpressSpatialVector, ReexpressSpatialVector) {
constexpr double kTolerance = 8 * std::numeric_limits<double>::epsilon();
const RotationMatrixd R_AB(RollPitchYawd(1.0, 1.25, 1.5));
Vector6d V_B;
V_B << 1.0, 2.0, 3.0, 4.0, 5.0, 6.0;
Vector6d V_A_expected;
V_A_expected.head<3>() = R_AB * V_B.head<3>();
V_A_expected.tail<3>() = R_AB * V_B.tail<3>();
Vector6d V_A;
ReexpressSpatialVector(R_AB, V_B, &V_A);
EXPECT_TRUE(CompareMatrices(V_A, V_A_expected, kTolerance));

// Ensure that it works when V_A and V_B are the same vector.
ReexpressSpatialVector(R_AB, V_B, &V_B);
EXPECT_TRUE(CompareMatrices(V_B, V_A_expected, kTolerance));
}

} // namespace
} // namespace internal
} // namespace math
Expand Down
10 changes: 9 additions & 1 deletion multibody/math/spatial_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,15 @@ class SpatialVector {
/// </pre>
friend SpatialQuantity operator*(
const math::RotationMatrix<T>& R_FE, const SpatialQuantity& V_E) {
return SpatialQuantity(R_FE * V_E.rotational(), R_FE * V_E.translational());
if constexpr (std::is_same_v<T, double>) {
SpatialQuantity V_F;
math::internal::ReexpressSpatialVector(R_FE, V_E.get_coeffs(),
&V_F.get_coeffs());
return V_F;
} else {
return SpatialQuantity(R_FE * V_E.rotational(),
R_FE * V_E.translational());
}
}

/// Factory to create a _zero_ spatial vector, i.e., a %SpatialVector whose
Expand Down
17 changes: 9 additions & 8 deletions multibody/tree/body_node_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,8 @@ void BodyNodeImpl<T, ConcreteMobilizer>::CalcVelocityKinematicsCache2_BaseToTip(
const SpatialVelocity<T> V_WMc_Mp = V_WMp_Mp.Shift(p_MpM_Mp); // 15 flops

SpatialVelocity<T>& V_WM_M = vc2->get_mutable_V_WM_M(index);
V_WM_M = R_MMp * V_WMc_Mp + V_FM_M; // 36 flops
V_WM_M = R_MMp * V_WMc_Mp // 6 SIMD flops
+ V_FM_M; // 6 flops
}

// As a guideline for developers, a summary of the computations performed in
Expand Down Expand Up @@ -439,7 +440,7 @@ void BodyNodeImpl<T, ConcreteMobilizer>::CalcSpatialAcceleration2_BaseToTip(
const math::RotationMatrix<T> R_MMp = X_MpM.rotation().inverse();
const Vector3<T>& w_WP_Mp = V_WMp_Mp.rotational(); // all frames on P same ω

// Start with parent contribution. 42 flops for shift + 30 for re-express
// Start with parent contribution. shift: 42 flops, reexpress: 6 SIMD flops
A_WM_M = R_MMp * A_WMp_Mp.Shift(p_MpM_Mp, w_WP_Mp); // parent contribution

// Next add in the velocity-dependent bias acceleration:
Expand Down Expand Up @@ -672,17 +673,17 @@ void BodyNodeImpl<T, ConcreteMobilizer>::CalcInverseDynamics2_TipToBase(
// Unfortunately the applied force is given at Bo and expressed in W. We
// need it applied at Mo and expressed in M.
const SpatialForce<T>& Fapp_BBo_W = Fapplied_Bo_W_array[index];
const SpatialForce<T> Fapp_BBo_M = R_MW * Fapp_BBo_W; // 30 flops
const SpatialForce<T> Fapp_BBo_M = R_MW * Fapp_BBo_W; // 6 SIMD flops
const SpatialForce<T> Fapp_BMo_M = Fapp_BBo_M.Shift(-p_MoBo_M); // 15 flops

// Calculate the velocity-dependent bias force on B (57 flops).
// Calculate the velocity-dependent bias force on B (48 flops).
const SpatialForce<T> Fbias_BMo_M(
mass * w_WM_M.cross(G_BMo_M * w_WM_M), // bias moment, 30 flops
mass * w_WM_M.cross(w_WM_M.cross(p_MoBcm_M))); // bias force, 27 flops
mass * w_WM_M.cross(G_BMo_M * w_WM_M), // bias moment, 27 flops
mass * w_WM_M.cross(w_WM_M.cross(p_MoBcm_M))); // bias force, 21 flops

// F_BMo_M is an output and will be the total force on B.
SpatialForce<T>& F_BMo_M = (*F_BMo_M_array)[index];
F_BMo_M = M_BMo_M * A_WM_M + Fbias_BMo_M - Fapp_BMo_M; // 78 flops
F_BMo_M = M_BMo_M * A_WM_M + Fbias_BMo_M - Fapp_BMo_M; // 57 flops

// Next, account for forces applied to B through its outboard joints, treated
// as though they were locked. Since we're going tip to base we already have
Expand All @@ -696,7 +697,7 @@ void BodyNodeImpl<T, ConcreteMobilizer>::CalcInverseDynamics2_TipToBase(
const Vector3<T>& p_MoMc_M = X_MMc.translation();

const SpatialForce<T>& F_CMc_Mc = (*F_BMo_M_array)[outboard_index];
const SpatialForce<T> F_CMc_M = R_MMc * F_CMc_Mc; // 30 flops
const SpatialForce<T> F_CMc_M = R_MMc * F_CMc_Mc; // 6 SIMD flops
const SpatialForce<T> F_CMo_M = F_CMc_M.Shift(-p_MoMc_M); // 15 flops
F_BMo_M += F_CMo_M; // 6 flops
}
Expand Down
8 changes: 4 additions & 4 deletions multibody/tree/spatial_inertia.cc
Original file line number Diff line number Diff line change
Expand Up @@ -448,10 +448,10 @@ SpatialForce<T> SpatialInertia<T>::operator*(
return SpatialForce<T>(
// Note: p_PScm_E here is p_BoBcm in the above notation.
// Rotational
mass_ * (G_SP_E_ * alpha_WB_E + p_PScm_E_.cross(a_WBo_E)),
mass_ * (G_SP_E_ * alpha_WB_E + p_PScm_E_.cross(a_WBo_E)), // 30 flops
// Translational: notice the order of the cross product is the reversed
// of the documentation above and thus no minus sign is needed.
mass_ * (alpha_WB_E.cross(p_PScm_E_) + a_WBo_E));
mass_ * (alpha_WB_E.cross(p_PScm_E_) + a_WBo_E)); // 15 flops
}

template <typename T>
Expand All @@ -466,10 +466,10 @@ SpatialMomentum<T> SpatialInertia<T>::operator*(
return SpatialMomentum<T>(
// Note: p_PScm_E here is p_BoBcm in the above notation.
// Rotational
mass_ * (G_SP_E_ * w_WB_E + p_PScm_E_.cross(v_WP_E)),
mass_ * (G_SP_E_ * w_WB_E + p_PScm_E_.cross(v_WP_E)), // 30 flops
// Translational: notice the order of the cross product is the reversed
// of the documentation above and thus no minus sign is needed.
mass_ * (w_WB_E.cross(p_PScm_E_) + v_WP_E));
mass_ * (w_WB_E.cross(p_PScm_E_) + v_WP_E)); // 15 flops
}

template <typename T>
Expand Down

0 comments on commit 5adfeef

Please sign in to comment.