Skip to content

Commit

Permalink
Expose average proxy thread CPU stat
Browse files Browse the repository at this point in the history
Summary: Tuning the number of proxy threads in prod is difficult as we lack insight into the thread utilization. This diff exposes average proxy CPU usage over a configurable time window proxy_cpu_monitor_ms.

Reviewed By: alikhtarov

Differential Revision: D52932252

fbshipit-source-id: 928536eb6663d719a564e2b21c3a54f34605b9fa
  • Loading branch information
Stuart Clark authored and facebook-github-bot committed Jan 24, 2024
1 parent 36d2516 commit 7d8d7c6
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 0 deletions.
6 changes: 6 additions & 0 deletions mcrouter/CarbonRouterInstance-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,11 @@ CarbonRouterInstance<RouterInfo>::spinUp() {
}
}

cpuStatsWorker_ = std::make_unique<CpuStatsWorker>(
std::chrono::milliseconds(opts_.proxy_cpu_monitor_ms),
functionScheduler(),
getIOThreadPool());

configuredFromDisk_.store(configuringFromDisk, std::memory_order_relaxed);

startTime_.store(time(nullptr), std::memory_order_relaxed);
Expand Down Expand Up @@ -371,6 +376,7 @@ CarbonRouterInstance<RouterInfo>::CarbonRouterInstance(

template <class RouterInfo>
void CarbonRouterInstance<RouterInfo>::shutdownImpl() noexcept {
resetCpuStatsWorker();
joinAuxiliaryThreads();
proxyEvbs_.clear();
resetMetadata();
Expand Down
37 changes: 37 additions & 0 deletions mcrouter/CarbonRouterInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,43 @@ void freeAllRouters() {
manager->freeAllMcrouters();
}
}

CpuStatsWorker::CpuStatsWorker(
std::chrono::milliseconds timeIntervalMs,
std::shared_ptr<folly::FunctionScheduler> scheduler,
const folly::IOThreadPoolExecutorBase& proxyThreads)
: scheduler_(scheduler),
startMs_(std::chrono::steady_clock::time_point::min()),
proxyThreads_(proxyThreads) {
if (timeIntervalMs.count() > 0) {
scheduler->addFunction(
[this]() { this->calculateCpuStats(); },
timeIntervalMs, /* monitoring interval in ms */
kCpuStatsWorkerName_,
std::chrono::milliseconds{
kWorkerStartDelayMs_} /* start delay in ms */);
}
}

CpuStatsWorker::~CpuStatsWorker() {
scheduler_->cancelFunctionAndWait(kCpuStatsWorkerName_);
}

void CpuStatsWorker::calculateCpuStats() {
auto end = std::chrono::steady_clock::now();
auto currUsedCpuTime = proxyThreads_.getUsedCpuTime();
if (usedCpuTime_.count() > 0 &&
startMs_ > std::chrono::steady_clock::time_point::min()) {
auto timeDeltaNs =
std::chrono::duration_cast<std::chrono::nanoseconds>(end - startMs_)
.count();
auto cpuDeltaNs = (currUsedCpuTime - usedCpuTime_).count();
avgCpu_ = 100 * cpuDeltaNs / (timeDeltaNs * proxyThreads_.numThreads());
}
// Store values for next iteration
usedCpuTime_ = currUsedCpuTime;
startMs_ = end;
}
} // namespace mcrouter
} // namespace memcache
} // namespace facebook
44 changes: 44 additions & 0 deletions mcrouter/CarbonRouterInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,39 @@ class McrouterManager;

class ProxyThread;

/**
* Class to to read CPU metrics of the mcrouter proxy threads
* based on a FunctionScheduler
*/
class CpuStatsWorker {
public:
CpuStatsWorker(
std::chrono::milliseconds timeIntervalMs,
std::shared_ptr<folly::FunctionScheduler> scheduler,
const folly::IOThreadPoolExecutorBase& proxyThreads);

~CpuStatsWorker();

/*
* Returns a size_t in range [0,100] measuring the average CPU
* of the proxy threads over the specified timeIntervalMs window.
*/
size_t getAvgCpu() {
return avgCpu_;
}

private:
size_t avgCpu_{0};
std::shared_ptr<folly::FunctionScheduler> scheduler_;
std::chrono::steady_clock::time_point startMs_;
std::chrono::nanoseconds usedCpuTime_{0};
const folly::IOThreadPoolExecutorBase& proxyThreads_;
static constexpr int kWorkerStartDelayMs_ = 1000;
static constexpr std::string_view kCpuStatsWorkerName_ = "cpu-stats_worker";

void calculateCpuStats();
};

/**
* A single mcrouter instance. A mcrouter instance has a single config,
* but might run across multiple threads.
Expand Down Expand Up @@ -144,6 +177,10 @@ class CarbonRouterInstance

ProxyBase* getProxyBase(size_t index) const override final;

size_t getProxyCpu() const override final {
return cpuStatsWorker_->getAvgCpu();
}

/**
* @return nullptr if index is >= opts.num_proxies,
* pointer to the proxy otherwise.
Expand All @@ -165,6 +202,10 @@ class CarbonRouterInstance
return *proxyThreads_;
}

void resetCpuStatsWorker() {
cpuStatsWorker_.reset();
}

CarbonRouterInstance(const CarbonRouterInstance&) = delete;
CarbonRouterInstance& operator=(const CarbonRouterInstance&) = delete;
CarbonRouterInstance(CarbonRouterInstance&&) noexcept = delete;
Expand Down Expand Up @@ -199,6 +240,9 @@ class CarbonRouterInstance
std::vector<std::unique_ptr<folly::VirtualEventBase>> proxyEvbs_;
std::shared_ptr<folly::IOThreadPoolExecutorBase> proxyThreads_;

// Worker thread to calculate avg cpu across proxy threads
std::unique_ptr<CpuStatsWorker> cpuStatsWorker_;

/**
* Indicates if evbs/IOThreadPoolExecutor has been created by McRouter or
* passed as an argument in construction.
Expand Down
6 changes: 6 additions & 0 deletions mcrouter/CarbonRouterInstanceBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ class CarbonRouterInstanceBase {
*/
virtual ProxyBase* getProxyBase(size_t index) const = 0;

/**
* Returns a size_t in range [0,100] measuring the average CPU
* of the proxy threads over the specified timeIntervalMs window.
*/
virtual size_t getProxyCpu() const = 0;

/**
* Bump and return the index of the next proxy to be used by clients.
*/
Expand Down
9 changes: 9 additions & 0 deletions mcrouter/mcrouter_options_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -924,6 +924,15 @@ MCROUTER_OPTION_INTEGER(
no_short,
"1 in S non-error connection samples will be logged")

MCROUTER_OPTION_INTEGER(
uint32_t,
proxy_cpu_monitor_ms,
0,
"proxy-cpu-monitor-ms",
no_short,
"Measure proxy CPU utilization every proxy_cpu_monitor_ms milliseconds. "
"0 means disabled.")

#ifdef ADDITIONAL_OPTIONS_FILE
#include ADDITIONAL_OPTIONS_FILE
#endif
Expand Down
2 changes: 2 additions & 0 deletions mcrouter/stat_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ STAT(asynclog_duration_us, stat_double, 0, .dbl = 0.0)
STAT(axon_proxy_duration_us, stat_double, 0, .dbl = 0.0)
// Number of proxy threads
STUI(num_proxies, 0, 1)
// Average CPU across proxies
STUI(proxy_cpu, 0, 1)
// Proxy requests that are currently being routed.
STUI(proxy_reqs_processing, 0, 1)
// Proxy requests queued up and not routed yet
Expand Down
4 changes: 4 additions & 0 deletions mcrouter/stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,10 @@ void prepare_stats(CarbonRouterInstanceBase& router, stat_t* stats) {
if (router.opts().num_proxies > 0) {
// Set the number of proxy threads
stat_set(stats, num_proxies_stat, router.opts().num_proxies);
// Set avg proxy cpu
if (router.opts().proxy_cpu_monitor_ms > 0) {
stat_set(stats, proxy_cpu_stat, router.getProxyCpu());
}
stat_div(stats, duration_us_stat, router.opts().num_proxies);
stat_div(stats, duration_get_us_stat, router.opts().num_proxies);
stat_div(stats, duration_update_us_stat, router.opts().num_proxies);
Expand Down

0 comments on commit 7d8d7c6

Please sign in to comment.