diff --git a/dynolog/src/KernelCollectorBase.cpp b/dynolog/src/KernelCollectorBase.cpp
index ef02c3cb..b925da60 100644
--- a/dynolog/src/KernelCollectorBase.cpp
+++ b/dynolog/src/KernelCollectorBase.cpp
@@ -12,6 +12,7 @@
 #include <chrono>
 #include <sstream>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 DEFINE_bool(
@@ -110,7 +111,7 @@ void KernelCollectorBase::readCpuStats() {
 void KernelCollectorBase::readNetworkStats() {
   auto devices = pfs_.get_net().get_dev();
 
-  std::map<std::string, struct RxTx> rxtxNew_;
+  std::unordered_map<std::string, struct RxTx> rxtxNew_;
 
   size_t nicDevCount = 0;
   for (const auto& device : devices) {
@@ -168,7 +169,7 @@ bool KernelCollectorBase::isMonitoringInterfaceActive(std::string interface) {
 }
 
 void KernelCollectorBase::updateNetworkStatsDelta(
-    const std::map<std::string, struct RxTx>& rxtxNew) {
+    const std::unordered_map<std::string, struct RxTx>& rxtxNew) {
   rxtxDelta_.clear();
   for (const auto& [devName, devRxtxNew] : rxtxNew) {
     if (rxtx_.find(devName) == rxtx_.end()) {
diff --git a/dynolog/src/KernelCollectorBase.h b/dynolog/src/KernelCollectorBase.h
index 2485ceb7..6ee405d7 100644
--- a/dynolog/src/KernelCollectorBase.h
+++ b/dynolog/src/KernelCollectorBase.h
@@ -7,7 +7,7 @@
 
 #include <time.h>
 #include <array>
-#include <map>
+#include <unordered_map>
 #include <vector>
 #include "dynolog/src/Types.h"
 #include "pfs/procfs.hpp"
@@ -47,10 +47,10 @@ class KernelCollectorBase {
   std::vector<CpuTime> perCoreCpuTime_;
 
   // Save more recent net device stats
-  std::map<std::string, struct RxTx> rxtx_, rxtxDelta_;
+  std::unordered_map<std::string, struct RxTx> rxtx_, rxtxDelta_;
 
   void updateNetworkStatsDelta(
-      const std::map<std::string, struct RxTx>& rxtxNew);
+      const std::unordered_map<std::string, struct RxTx>& rxtxNew);
   bool isMonitoringInterfaceActive(std::string interface);
 
   // Should match googletest/include/gtest/gtest_prod.h
diff --git a/dynolog/src/LibkinetoConfigManager.h b/dynolog/src/LibkinetoConfigManager.h
index 78d34088..5998004d 100644
--- a/dynolog/src/LibkinetoConfigManager.h
+++ b/dynolog/src/LibkinetoConfigManager.h
@@ -15,6 +15,7 @@
 #include <set>
 #include <string>
 #include <thread>
+#include <unordered_map>
 #include <vector>
 #include "dynolog/src/LibkinetoTypes.h"
 
@@ -69,12 +70,12 @@ class LibkinetoConfigManager {
 
   // Map of pid ancestry -> LibkinetoProcess
   using ProcessMap = std::map<std::set<int32_t>, LibkinetoProcess>;
-  std::map<std::string, ProcessMap> jobs_;
+  std::unordered_map<std::string, ProcessMap> jobs_;
 
   // Map of gpu id -> pids
-  using InstancesPerGpuMap = std::map<int32_t, std::set<int32_t>>;
+  using InstancesPerGpuMap = std::unordered_map<int32_t, std::set<int32_t>>;
   // Job id -> InstancesPerGpu
-  std::map<std::string, InstancesPerGpuMap> jobInstancesPerGpu_;
+  std::unordered_map<std::string, InstancesPerGpuMap> jobInstancesPerGpu_;
   mutable std::mutex mutex_;
 
   void setOnDemandConfigForProcess(
diff --git a/dynolog/src/Metrics.cpp b/dynolog/src/Metrics.cpp
index 82b8d7e5..8cd7f3af 100644
--- a/dynolog/src/Metrics.cpp
+++ b/dynolog/src/Metrics.cpp
@@ -6,7 +6,7 @@
 #include "dynolog/src/Metrics.h"
 
 #include <fmt/format.h>
-#include <map>
+#include <unordered_map>
 
 namespace dynolog {
 
@@ -34,7 +34,7 @@ const std::vector<MetricDesc> getAllMetrics() {
        .type = MetricType::Instant,
        .desc = "How long the system has been running in seconds."},
   };
-  static std::map<std::string, std::string> cpustats = {
+  static std::unordered_map<std::string, std::string> cpustats = {
       {"cpu_u_ms", "user"},
       {"cpu_s_ms", "system"},
       {"cpu_n_ms", "nice"},
diff --git a/dynolog/src/PerfMonitor.h b/dynolog/src/PerfMonitor.h
index 148af7c4..852aae70 100644
--- a/dynolog/src/PerfMonitor.h
+++ b/dynolog/src/PerfMonitor.h
@@ -9,6 +9,8 @@
 #include "hbt/src/mon/Monitor.h"
 #include "hbt/src/perf_event/BuiltinMetrics.h"
 
+#include <unordered_map>
+
 namespace hbt = facebook::hbt;
 
 namespace dynolog {
@@ -39,8 +41,8 @@ class PerfMonitor {
   const hbt::CpuSet& monCpus_;
   std::shared_ptr<hbt::perf_event::PmuDeviceManager> pmuDeviceManager_;
   const MuxGroupId defaultMuxGroupId_;
-  std::map<ElemId, std::optional<TCountReader::ReadValues>> readValues_;
-  std::map<ElemId, std::shared_ptr<TCountReader>> countReaders_;
+  std::unordered_map<ElemId, std::optional<TCountReader::ReadValues>> readValues_;
+  std::unordered_map<ElemId, std::shared_ptr<TCountReader>> countReaders_;
 };
 
 // singleton object for default Metrics and PmuDeviceManager
diff --git a/dynolog/src/metric_frame/MetricFrame.h b/dynolog/src/metric_frame/MetricFrame.h
index 87ba02a0..ec51424a 100644
--- a/dynolog/src/metric_frame/MetricFrame.h
+++ b/dynolog/src/metric_frame/MetricFrame.h
@@ -10,8 +10,8 @@
 #include "dynolog/src/metric_frame/MetricSeries.h"
 
 #include <cmath>
-#include <map>
 #include <memory>
+#include <unordered_map>
 #include <variant>
 #include <vector>
 
@@ -38,7 +38,7 @@ class MetricFrameMap : public MetricFrameBase {
   void show(std::ostream& s) const override;
 
  protected:
-  std::map<std::string, MetricSeriesVar> series_;
+  std::unordered_map<std::string, MetricSeriesVar> series_;
 };
 
 using VectorSeriesDefList = std::vector<MetricSeriesVar>;
diff --git a/dynolog/tests/KernelCollecterTest.cpp b/dynolog/tests/KernelCollecterTest.cpp
index 4c032e92..0ab0feaa 100644
--- a/dynolog/tests/KernelCollecterTest.cpp
+++ b/dynolog/tests/KernelCollecterTest.cpp
@@ -109,8 +109,8 @@ TEST(KernelCollecterTest, NetworkStatsTest) {
 }
 
 TEST(KernelCollecterTest, UpdateNetworkStatsDeltaTest) {
-  std::map<std::string, struct RxTx> oneDevice;
-  std::map<std::string, struct RxTx> twoDevices;
+  std::unordered_map<std::string, struct RxTx> oneDevice;
+  std::unordered_map<std::string, struct RxTx> twoDevices;
 
   KernelCollectorBase kb{get_test_root()};
 
diff --git a/hbt/src/mon/Monitor.h b/hbt/src/mon/Monitor.h
index dee05d45..6201592b 100644
--- a/hbt/src/mon/Monitor.h
+++ b/hbt/src/mon/Monitor.h
@@ -14,6 +14,7 @@
 
 #include <pfs/procfs.hpp>
 #include <mutex>
+#include <unordered_map>
 
 #ifdef HBT_ENABLE_TRACING
 #include "hbt/src/mon/TraceMonitor.h"
@@ -200,13 +201,13 @@ class Monitor {
   /// Read counts for all events opened in sampling mode
   /// in all TraceCollectors.
   auto readSamplingCounts() const {
-    using TraceCollectorReadValues = std::map<
+    using TraceCollectorReadValues = std::unordered_map<
         std::string,
         std::optional<TraceCollector::TPerCpuCountSampleGenerator::ReadValues>>;
 
     std::lock_guard<std::mutex> lock{mutex_};
 
-    std::map<ElemId, TraceCollectorReadValues> rvs;
+    std::unordered_map<ElemId, TraceCollectorReadValues> rvs;
 
     for (auto& [k, tm] : trace_monitors_) {
       HBT_THROW_ASSERT_IF(tm == nullptr);
@@ -220,10 +221,10 @@ class Monitor {
 
   /// Read counts for all events opened in counting mode
   /// in all PerCpuCountReaders.
-  std::map<ElemId, std::optional<TCountReader::ReadValues>> readAllCounts()
+  std::unordered_map<ElemId, std::optional<TCountReader::ReadValues>> readAllCounts()
       const {
     std::lock_guard<std::mutex> lock{mutex_};
-    std::map<ElemId, std::optional<TCountReader::ReadValues>> rvs;
+    std::unordered_map<ElemId, std::optional<TCountReader::ReadValues>> rvs;
 
     for (auto& [k, cr] : count_readers_) {
       HBT_THROW_ASSERT_IF(cr == nullptr);
@@ -234,10 +235,10 @@ class Monitor {
 
   /// Read counts for all events opened in counting mode
   /// in all PerCpuCountReaders.
-  std::map<ElemId, std::optional<std::vector<TCountReader::ReadValues>>>
+  std::unordered_map<ElemId, std::optional<std::vector<TCountReader::ReadValues>>>
   readAllCountsPerCpu() const {
     std::lock_guard<std::mutex> lock{mutex_};
-    std::map<ElemId, std::optional<std::vector<TCountReader::ReadValues>>> rvs;
+    std::unordered_map<ElemId, std::optional<std::vector<TCountReader::ReadValues>>> rvs;
 
     for (auto& [k, cr] : count_readers_) {
       HBT_THROW_ASSERT_IF(cr == nullptr);
@@ -343,10 +344,10 @@ class Monitor {
 #ifdef HBT_ENABLE_BPERF
   /// Read counts for all events opened in counting mode
   /// in all BPerfCountReaders.
-  std::map<ElemId, std::optional<TBPerfCountReader::ReadValues>>
+  std::unordered_map<ElemId, std::optional<TBPerfCountReader::ReadValues>>
   readAllBPerfCounts(bool skip_offset = false) const {
     std::lock_guard<std::mutex> lock{mutex_};
-    std::map<ElemId, std::optional<TBPerfCountReader::ReadValues>> rvs;
+    std::unordered_map<ElemId, std::optional<TBPerfCountReader::ReadValues>> rvs;
 
     for (auto& [k, cr] : bperf_count_readers_) {
       HBT_THROW_ASSERT_IF(cr == nullptr);
diff --git a/hbt/src/perf_event/BuiltinMetrics.cpp b/hbt/src/perf_event/BuiltinMetrics.cpp
index edd7f248..f2c8e585 100644
--- a/hbt/src/perf_event/BuiltinMetrics.cpp
+++ b/hbt/src/perf_event/BuiltinMetrics.cpp
@@ -18,8 +18,8 @@
 #include "hbt/src/perf_event/json_events/generated/intel/JsonEvents.h"
 #endif // USE_JSON_GENERATED_PERF_EVENTS
 
-#include <map>
 #include <memory>
+#include <unordered_map>
 
 namespace facebook::hbt::perf_event {
 
@@ -522,7 +522,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "instructions",
       "Number of CPU instructions retired since the counter is enabled.",
       "Number of CPU instructions retired since the counter is enabled.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "instructions",
@@ -539,7 +539,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "cycles",
       "Number of CPU clock cycles since the counter is enabled.",
       "Number of CPU clock cycles since the counter is enabled.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "cycles",
@@ -555,7 +555,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "instructions_per_cycle",
       "Average number of instructions executed each clock cycle.",
       "Average number of instructions executed each clock cycle.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -583,7 +583,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
         "Core-originated cacheable demand requests missed L2",
         "Counts core-originated cacheable requests that miss the L2 cache. "
         "Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. ",
-        std::map<TOptCpuArch, EventRefs>{
+        std::unordered_map<TOptCpuArch, EventRefs>{
             {std::nullopt,
              EventRefs{EventRef{
                  "l2_cache_misses",
@@ -600,7 +600,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
         "Core-originated cacheable demand requests missed L2",
         "Counts core-originated cacheable requests that miss the L2 cache. "
         "Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L2. ",
-        std::map<TOptCpuArch, EventRefs>{
+        std::unordered_map<TOptCpuArch, EventRefs>{
             {std::nullopt,
              EventRefs{
                  EventRef{
@@ -634,7 +634,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. "
       "It does not include all misses to the L3."
       "Also count number of instructions in the same period to calculate l3 misses per instruction.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -659,7 +659,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "dram_access_reads",
       "Memory bandwidth used for read events.",
       "Memory bandwidth used for read events. The value is inferred from Intel offcore counters.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {CpuArch::BDW,
            EventRefs{EventRef{
                "dram_access_reads",
@@ -714,7 +714,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       " executed by AVX vector instruction set."
       "Each instruction can be converted to operations by multipying the count"
       " with 1, 4, 8, 16 respectively.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -752,7 +752,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       " executed by AVX vector instruction set."
       "Each instruction can be converted to operations by multipying the count"
       " with 1, 2, 4, 8 respectively.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -787,7 +787,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "cpu_clock",
       "High-resolution sys and user CPU clock",
       "High-resolution sys and user CPU clock",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -810,7 +810,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "generic_sw",
       "All generic software events every context switch",
       "All generic software events. They are never multiplexed.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -863,7 +863,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "page_faults",
       "Software Page faults",
       "Major and minor page faults",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -893,7 +893,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "system_calls",
       "System calls Tracepoint",
       "System calls Tracepoint Event",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -917,7 +917,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "dqos",
       "System-derived estimation of Dyno QoS",
       "IPC and Scheduler stats. Requires root. Make sure /proc/sys/kernel/sched_schedstats is set.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -1002,7 +1002,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "ipc",
       "IPC including user, kernel, and hypervisor.",
       "Intructions-per-Cycle (IPC) including user, kernel, and hypervisor. ",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -1025,7 +1025,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "cs_ipc",
       "Context switch-based IPC including user, kernel, and hypervisor.",
       "Context switch-based Intructions-per-Cycle (IPC) including user, kernel, and hypervisor. ",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -1054,7 +1054,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "cycles_breakdown",
       "Cycles in user, kernel and idle.",
       "Time (ref-cycles) and cycles spent in user (ring 3) or kernel (ring 0)",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{
                EventRef{
@@ -1084,7 +1084,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "topdown_l4_mem",
       "External memory (DRAM) bandwidth and latency",
       "External memory (DRAM) bandwidth and latency.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {CpuArch::BDX,
            EventRefs{
                EventRef{
@@ -1122,7 +1122,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "topdown_l3_icache",
       "Fraction of cycles the CPU was stalled due to instruction cache misses",
       "Fraction of cycles the CPU was stalled due to instruction cache misses.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {CpuArch::BDX,
            EventRefs{
                EventRef{
@@ -1146,7 +1146,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "topdown_l3_L1_bound",
       "Fraction of cycles the CPU was stalled due to L1 data cache misses",
       "Fraction of cycles the CPU was stalled due to L1 data cache misses.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {CpuArch::BDX,
            EventRefs{
                EventRef{
@@ -1176,7 +1176,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "topdown_l3_L2_bound",
       "Estimates how often the CPU was stalled due to L2 cache accesses by loads.",
       "Estimates how often the CPU was stalled due to L2 cache accesses by loads.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {CpuArch::BDX,
            EventRefs{
                EventRef{
@@ -1220,7 +1220,7 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
       "Provides how many CPU is halted, how much is spent in kernel and user space "
       " and a breakdown of how many pipeline slots are wasted due "
       "to each hardware bottleneck.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {CpuArch::SKX,
            EventRefs{
                // Note: There are two events for CPU_CLK_UNHALTED:
@@ -1333,7 +1333,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_ICACHE_MISSES",
       "L2 code requests",
       "Counts the total number of L2 code requests.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "icache_misses",
@@ -1350,7 +1350,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_ICACHE_MISSES_PERF",
       "Level 1 instruction cache load operation misses",
       "Level 1 instruction cache load operation misses",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "icache_misses_perf",
@@ -1366,7 +1366,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_DCACHE_MISSES",
       "Counts the number of cache lines replaced in L1 data cache.",
       "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "dcache_misses",
@@ -1382,7 +1382,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_ITLB_MISSES",
       "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
       "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "itlb_misses",
@@ -1398,7 +1398,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_L2_MISSES",
       "L2 cache lines filling L2",
       "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "core_l2_misses",
@@ -1414,7 +1414,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_LLC_MISSES",
       "Core-originated cacheable requests that missed L3  (Except hardware prefetches to the L3)",
       "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2.  It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "llc_misses",
@@ -1430,7 +1430,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_BRANCH_MISSES",
       "All mispredicted branch instructions retired.",
       "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "branch_misses",
@@ -1446,7 +1446,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_BRANCH_INSTRUCTIONS",
       "All branch instructions retired.",
       "Counts all branch instructions retired.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "branch_misses",
@@ -1462,7 +1462,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_L2_PREFETCH_HITS",
       "SW prefetch requests that hit L2 cache.",
       "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "prefetch_hits",
@@ -1478,7 +1478,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_L2_PREFETCH_MISSES",
       "SW prefetch requests that miss L2 cache.",
       "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "prefetch_misses",
@@ -1494,7 +1494,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_FLOPS_DP_SCALAR",
       "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
       "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "flops_dp_scalar",
@@ -1510,7 +1510,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_FLOPS_SP_SCALAR",
       "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
       "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "flops_sp_scalar",
@@ -1526,7 +1526,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_FLOPS_DP_SSE",
       "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
       "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "flops_dp_sse",
@@ -1542,7 +1542,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_FLOPS_SP_SSE",
       "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
       "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "flops_sp_sse",
@@ -1558,7 +1558,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_FLOPS_DP_AVX",
       "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
       "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "flops_dp_avx",
@@ -1574,7 +1574,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_FLOPS_SP_AVX",
       "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
       "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "flops_sp_avx",
@@ -1590,7 +1590,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_FLOPS_DP_AVX2",
       "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
       "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "flops_dp_avx2",
@@ -1606,7 +1606,7 @@ void addCoreMetrics(std::shared_ptr<Metrics>& metrics) {
       "HW_CORE_FLOPS_SP_AVX2",
       "Counts number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 16 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
       "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 16 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
-      std::map<TOptCpuArch, EventRefs>{
+      std::unordered_map<TOptCpuArch, EventRefs>{
           {std::nullopt,
            EventRefs{EventRef{
                "flops_sp_avx2",
diff --git a/hbt/src/perf_event/Metrics.h b/hbt/src/perf_event/Metrics.h
index 0cb2aef5..d2bf9512 100644
--- a/hbt/src/perf_event/Metrics.h
+++ b/hbt/src/perf_event/Metrics.h
@@ -9,8 +9,8 @@
 #include "hbt/src/perf_event/PmuDevices.h"
 #include "hbt/src/perf_event/PmuEvent.h"
 
-#include <map>
 #include <memory>
+#include <unordered_map>
 #include <variant>
 
 namespace facebook::hbt::perf_event {
@@ -46,7 +46,7 @@ struct MetricDesc {
   MetricId id;
   std::string brief_desc;
   std::string full_desc;
-  std::map<TOptCpuArch, EventRefs> event_refs_by_arch;
+  std::unordered_map<TOptCpuArch, EventRefs> event_refs_by_arch;
   uint64_t default_sampling_period;
   System::Permissions req_permissions;
   std::vector<std::string> dives;
@@ -56,7 +56,7 @@ struct MetricDesc {
       MetricId id,
       const std::string& brief_desc,
       const std::string& full_desc,
-      const std::map<TOptCpuArch, EventRefs>& event_refs_by_arch,
+      const std::unordered_map<TOptCpuArch, EventRefs>& event_refs_by_arch,
       uint64_t default_sampling_period,
       const System::Permissions& req_permissions,
       const std::vector<std::string>& dives,
@@ -223,7 +223,7 @@ class Metrics {
   }
 
  protected:
-  std::map<std::string, std::shared_ptr<const MetricDesc>> metric_descs_;
+  std::unordered_map<std::string, std::shared_ptr<const MetricDesc>> metric_descs_;
 };
 
 } // namespace facebook::hbt::perf_event
diff --git a/hbt/src/perf_event/PmuDevices.h b/hbt/src/perf_event/PmuDevices.h
index 57873938..3ac480af 100644
--- a/hbt/src/perf_event/PmuDevices.h
+++ b/hbt/src/perf_event/PmuDevices.h
@@ -12,6 +12,7 @@
 #include <map>
 #include <memory>
 #include <numeric>
+#include <unordered_map>
 #include <variant>
 #include <vector>
 
@@ -59,7 +60,7 @@ struct LibPfm4EventGroup {
   }
 };
 
-using LibPfm4EventGroups = std::map<std::string, LibPfm4EventGroup>;
+using LibPfm4EventGroups = std::unordered_map<std::string, LibPfm4EventGroup>;
 
 /// An instance representing a system's PMU (a Performance Monitoring Unit).
 /// It can be statically enumerated PMU or a dynamic one.
@@ -136,8 +137,8 @@ class PmuDevice {
     uint8_t len;
   };
 
-  using SysFsDeviceCaps = std::map<std::string, std::string>;
-  using SysFsDeviceFormat = std::map<std::string, FormatAttr>;
+  using SysFsDeviceCaps = std::unordered_map<std::string, std::string>;
+  using SysFsDeviceFormat = std::unordered_map<std::string, FormatAttr>;
 
   // Entries in format subfolder
   // (/sys/devices/<pmu_name>/format).
@@ -256,8 +257,8 @@ class PmuDevice {
   bool in_sysfs_;
 
   // Alias as key, original event ID as value.
-  std::map<EventId, std::shared_ptr<EventDef>> event_defs_;
-  std::map<EventId, EventId> aliases_;
+  std::unordered_map<EventId, std::shared_ptr<EventDef>> event_defs_;
+  std::unordered_map<EventId, EventId> aliases_;
 
   // PMUs that are not per-core can be opened for any
   // CPU within a CPU group. In uncore PMUs, this is
@@ -273,7 +274,7 @@ class PmuDevice {
   }
 };
 
-using PerCpuEventConfs = std::map<CpuId, EventConfs>;
+using PerCpuEventConfs = std::unordered_map<CpuId, EventConfs>;
 
 /// Container for all types and instances of PMUs in the system.
 class PmuDeviceManager {