Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(autoware_map_based_prediction autoware_universe_utils): improve performance of map_based_prediction #1464

Merged
73 changes: 73 additions & 0 deletions common/autoware_universe_utils/examples/example_lru_cache.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright 2024 TIER IV, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "autoware/universe_utils/system/lru_cache.hpp"

#include <chrono>
#include <cstdint>
#include <iostream>

using autoware::universe_utils::LRUCache;

// Fibonacci calculation with LRU cache
int64_t fibonacci_with_cache(int n, LRUCache<int, int64_t> * cache)
{
if (n <= 1) return n;

if (cache->contains(n)) {
return *cache->get(n);
}
int64_t result = fibonacci_with_cache(n - 1, cache) + fibonacci_with_cache(n - 2, cache);
cache->put(n, result);
return result;
}

// Fibonacci calculation without cache
int64_t fibonacci_no_cache(int n)
{
if (n <= 1) return n;
return fibonacci_no_cache(n - 1) + fibonacci_no_cache(n - 2);
}

// Helper function to measure execution time
template <typename Func, typename... Args>
int64_t measure_time(Func func, Args &&... args)
{
auto start = std::chrono::high_resolution_clock::now();
func(std::forward<Args>(args)...);
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
}

int main()
{
const int max_n = 40; // Increased for more noticeable time difference
LRUCache<int, int64_t> cache(max_n); // Cache size set to MAX_N

std::cout << "Comparing Fibonacci calculation time with and without LRU cache:\n\n";
std::cout << "n\tWith Cache (μs)\tWithout Cache (μs)\n";
std::cout << std::string(43, '-') << "\n";

for (int i = 30; i <= max_n; ++i) { // Starting from 30 for more significant differences
int64_t time_with_cache = measure_time([i, &cache]() { fibonacci_with_cache(i, &cache); });
int64_t time_without_cache = measure_time([i]() { fibonacci_no_cache(i); });

std::cout << i << "\t" << time_with_cache << "\t\t" << time_without_cache << "\n";

// Clear the cache after each iteration to ensure fair comparison
cache.clear();
}

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// Copyright 2024 TIER IV, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef AUTOWARE__UNIVERSE_UTILS__SYSTEM__LRU_CACHE_HPP_
#define AUTOWARE__UNIVERSE_UTILS__SYSTEM__LRU_CACHE_HPP_

#include <cstddef>
#include <list>
#include <optional>
#include <unordered_map>
#include <utility>

namespace autoware::universe_utils
{

/**
* @brief A template class for LRU (Least Recently Used) Cache.
*
* This class implements a simple LRU cache using a combination of a list and a hash map.
*
* @tparam Key The type of keys.
* @tparam Value The type of values.
* @tparam Map The type of underlying map, defaulted to std::unordered_map.
*/
template <typename Key, typename Value, template <typename...> class Map = std::unordered_map>
class LRUCache
{
private:
size_t capacity_; ///< The maximum capacity of the cache.
std::list<std::pair<Key, Value>> cache_list_; ///< List to maintain the order of elements.
Map<Key, typename std::list<std::pair<Key, Value>>::iterator>
cache_map_; ///< Map for fast access to elements.

public:
/**
* @brief Construct a new LRUCache object.
*
* @param size The capacity of the cache.
*/
explicit LRUCache(size_t size) : capacity_(size) {}

/**
* @brief Get the capacity of the cache.
*
* @return The capacity of the cache.
*/
[[nodiscard]] size_t capacity() const { return capacity_; }

/**
* @brief Insert a key-value pair into the cache.
*
* If the key already exists, its value is updated and it is moved to the front.
* If the cache exceeds its capacity, the least recently used element is removed.
*
* @param key The key to insert.
* @param value The value to insert.
*/
void put(const Key & key, const Value & value)
{
auto it = cache_map_.find(key);
if (it != cache_map_.end()) {
cache_list_.erase(it->second);
}
cache_list_.push_front({key, value});
cache_map_[key] = cache_list_.begin();

if (cache_map_.size() > capacity_) {
auto last = cache_list_.back();
cache_map_.erase(last.first);
cache_list_.pop_back();
}
}

/**
* @brief Retrieve a value from the cache.
*
* If the key does not exist in the cache, std::nullopt is returned.
* If the key exists, the value is returned and the element is moved to the front.
*
* @param key The key to retrieve.
* @return The value associated with the key, or std::nullopt if the key does not exist.
*/
std::optional<Value> get(const Key & key)
{
auto it = cache_map_.find(key);
if (it == cache_map_.end()) {
return std::nullopt;
}
cache_list_.splice(cache_list_.begin(), cache_list_, it->second);
return it->second->second;
}

/**
* @brief Clear the cache.
*
* This removes all elements from the cache.
*/
void clear()
{
cache_list_.clear();
cache_map_.clear();
}

/**
* @brief Get the current size of the cache.
*
* @return The number of elements in the cache.
*/
[[nodiscard]] size_t size() const { return cache_map_.size(); }

/**
* @brief Check if the cache is empty.
*
* @return True if the cache is empty, false otherwise.
*/
[[nodiscard]] bool empty() const { return cache_map_.empty(); }

/**
* @brief Check if a key exists in the cache.
*
* @param key The key to check.
* @return True if the key exists, false otherwise.
*/
[[nodiscard]] bool contains(const Key & key) const
{
return cache_map_.find(key) != cache_map_.end();
}
};

} // namespace autoware::universe_utils

#endif // AUTOWARE__UNIVERSE_UTILS__SYSTEM__LRU_CACHE_HPP_
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright 2024 TIER IV, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "autoware/universe_utils/system/lru_cache.hpp"

#include <gtest/gtest.h>

#include <chrono>
#include <cstdint>
#include <iostream>
#include <utility>

using autoware::universe_utils::LRUCache;

// Fibonacci calculation with LRU cache
int64_t fibonacci_with_cache(int n, LRUCache<int, int64_t> * cache)
{
if (n <= 1) return n;

if (cache->contains(n)) {
return *cache->get(n);
}
int64_t result = fibonacci_with_cache(n - 1, cache) + fibonacci_with_cache(n - 2, cache);
cache->put(n, result);
return result;
}

// Fibonacci calculation without cache
int64_t fibonacci_no_cache(int n)
{
if (n <= 1) return n;
return fibonacci_no_cache(n - 1) + fibonacci_no_cache(n - 2);
}

// Helper function to measure execution time
template <typename Func, typename... Args>
std::pair<int64_t, decltype(std::declval<Func>()(std::declval<Args>()...))> measure_time(
Func func, Args &&... args)
{
auto start = std::chrono::high_resolution_clock::now();
auto result = func(std::forward<Args>(args)...);
auto end = std::chrono::high_resolution_clock::now();
return {std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(), result};
}

// Test case to verify Fibonacci calculation results with and without cache
TEST(FibonacciLRUCacheTest, CompareResultsAndPerformance)
{
const int max_n = 40; // Test range
LRUCache<int, int64_t> cache(max_n); // Cache with capacity set to max_n

for (int i = 0; i <= max_n; ++i) {
// Measure time for performance comparison
auto [time_with_cache, result_with_cache] =
measure_time([i, &cache]() { return fibonacci_with_cache(i, &cache); });
auto [time_without_cache, result_without_cache] =
measure_time([i]() { return fibonacci_no_cache(i); });

EXPECT_EQ(result_with_cache, result_without_cache) << "Mismatch at n = " << i;

// Print the calculation time
std::cout << "n = " << i << ": With Cache = " << time_with_cache
<< " μs, Without Cache = " << time_without_cache << " μs\n";

// // Clear the cache after each iteration to ensure fair comparison
cache.clear();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <autoware/universe_utils/ros/published_time_publisher.hpp>
#include <autoware/universe_utils/ros/transform_listener.hpp>
#include <autoware/universe_utils/ros/uuid_helper.hpp>
#include <autoware/universe_utils/system/lru_cache.hpp>
#include <autoware/universe_utils/system/stop_watch.hpp>
#include <autoware/universe_utils/system/time_keeper.hpp>
#include <rclcpp/rclcpp.hpp>
Expand All @@ -39,7 +40,9 @@
#include <visualization_msgs/msg/marker_array.hpp>

#include <lanelet2_core/Forward.h>
#include <lanelet2_core/LaneletMap.h>
#include <lanelet2_routing/Forward.h>
#include <lanelet2_routing/LaneletPath.h>
#include <lanelet2_traffic_rules/TrafficRules.h>

#include <algorithm>
Expand All @@ -53,6 +56,27 @@
#include <utility>
#include <vector>

namespace std
{
template <>
struct hash<lanelet::routing::LaneletPaths>
{
// 0x9e3779b9 is a magic number. See
// https://stackoverflow.com/questions/4948780/magic-number-in-boosthash-combine
size_t operator()(const lanelet::routing::LaneletPaths & paths) const
{
size_t seed = 0;
for (const auto & path : paths) {
for (const auto & lanelet : path) {
seed ^= hash<int64_t>{}(lanelet.id()) + 0x9e3779b9 + (seed << 6U) + (seed >> 2U);
}
// Add a separator between paths
seed ^= hash<int>{}(0) + 0x9e3779b9 + (seed << 6U) + (seed >> 2U);
}
return seed;
}
};
} // namespace std
namespace autoware::map_based_prediction
{
struct LateralKinematicsToLanelet
Expand Down Expand Up @@ -178,6 +202,9 @@ class MapBasedPredictionNode : public rclcpp::Node
// Crosswalk Entry Points
lanelet::ConstLanelets crosswalks_;

// Fences
lanelet::LaneletMapUPtr fence_layer_{nullptr};

// Parameters
bool enable_delay_compensation_;
PredictionTimeHorizon prediction_time_horizon_;
Expand Down Expand Up @@ -287,7 +314,10 @@ class MapBasedPredictionNode : public rclcpp::Node
const float path_probability, const ManeuverProbability & maneuver_probability,
const Maneuver & maneuver, std::vector<PredictedRefPath> & reference_paths,
const double speed_limit = 0.0);
std::vector<PosePath> convertPathType(const lanelet::routing::LaneletPaths & paths);

mutable universe_utils::LRUCache<lanelet::routing::LaneletPaths, std::vector<PosePath>>
lru_cache_of_convert_path_type_{1000};
std::vector<PosePath> convertPathType(const lanelet::routing::LaneletPaths & paths) const;

void updateFuturePossibleLanelets(
const TrackedObject & object, const lanelet::routing::LaneletPaths & paths);
Expand Down
Loading
Loading