From c2afc74675fe07542ecb01c314771777b9bd4a3c Mon Sep 17 00:00:00 2001 From: Katie Mancini Date: Tue, 27 Aug 2024 10:48:22 -0700 Subject: [PATCH] move client info into base for all samples Summary: I added client logging to the Watchman `dispatch_command` event. Watchman has a few different types of scuba events: - query_execute - sync_to_now - full_crawl - clock_test - saved_state - dropped - age_out Some of these events are associated with a specific client request, so I would like to include client information there as well. There are a few that are not related to a specific client event: - full_crawl - clock_test - dropped - age_out So I only intend to add client information to - query_execute - sync_to_now - saved_state Going to move these client attributes to the more generically used MetadataEventData instead of being on the event types for each event to avoid copy paste. Reviewed By: genevievehelsel Differential Revision: D61684457 fbshipit-source-id: bf4765780cfd09c7ff5a0a3f0ef56dcc10f58dfd --- eden/common/utils/ProcessInfoCache.cpp | 7 ++++ eden/common/utils/ProcessInfoCache.h | 8 +++++ .../utils/test/ProcessInfoCacheTest.cpp | 33 +++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/eden/common/utils/ProcessInfoCache.cpp b/eden/common/utils/ProcessInfoCache.cpp index 2b25227..06e6152 100644 --- a/eden/common/utils/ProcessInfoCache.cpp +++ b/eden/common/utils/ProcessInfoCache.cpp @@ -8,6 +8,7 @@ #include "eden/common/utils/ProcessInfoCache.h" #include +#include #include #include #include @@ -418,4 +419,10 @@ std::optional ProcessInfoCache::getProcessName(pid_t pid) { readProcessSimpleName(pid)}; } +/*static*/ std::string ProcessInfoCache::cleanProcessCommandline( + std::string processName) { + std::replace(processName.begin(), processName.end(), '\0', ' '); + return folly::rtrimWhitespace(processName).str(); +} + } // namespace facebook::eden diff --git a/eden/common/utils/ProcessInfoCache.h b/eden/common/utils/ProcessInfoCache.h index 142ae86..49aba56 100644 --- a/eden/common/utils/ProcessInfoCache.h +++ b/eden/common/utils/ProcessInfoCache.h @@ -163,6 +163,14 @@ class ProcessInfoCache { */ std::optional getProcessName(pid_t pid); + /** + * Commandlines (on linux anyways) use \0 instead of spaces to separate + * arguments. sl is often a command we are interested in. and sl also + * doed some funky commandline manipulation that causes a bunch of \0 to be + * on the end of their commandline. This will clean those off. + */ + static std::string cleanProcessCommandline(std::string process); + private: struct State { std::unordered_map> infos; diff --git a/eden/common/utils/test/ProcessInfoCacheTest.cpp b/eden/common/utils/test/ProcessInfoCacheTest.cpp index 1e8f9ad..39f5ba9 100644 --- a/eden/common/utils/test/ProcessInfoCacheTest.cpp +++ b/eden/common/utils/test/ProcessInfoCacheTest.cpp @@ -208,4 +208,37 @@ TEST(ProcessInfoCache, multipleLookups) { thread1.join(); thread2.join(); } + +TEST(ProcessInfoCache, testSlCommandlineCleaning) { + // Sapling does some commandline manipulation to name the background processes + // something like pfc[worker/XXXXXXXX]. But this causes the commanline to be + // full of null bytes. This is something we want to be filtered out in + // telemetry. + + // note to editor: we use the char *, size_t variant of the string constructor + // o.w the \0 will be interpreted as the end of the string! + auto sl_worker_raw_cmdline = std::string{ + "pfc[worker/663504]\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", + 112}; + + EXPECT_EQ( + "pfc[worker/663504]", + ProcessInfoCache::cleanProcessCommandline(sl_worker_raw_cmdline)); +} + +TEST(ProcessInfoCache, testBuckCommandlineCleaning) { + // Commandlines are \0 byte separated. We should turn those null bytes into + // spaces to make the commandlines easier to read in telemetry. + + // note to editor: we use the char *, size_t variant of the string constructor + // o.w the \0 will be interpreted as the end of the string! + auto buck2_raw_cmdline = std::string{ + "buck2d[fbsource]\u0000--isolation-dir\u0000v2\u0000daemon\u0000{\"buck_config\":\"somevalue\"}\u0000", + 70}; + + EXPECT_EQ( + "buck2d[fbsource] --isolation-dir v2 daemon {\"buck_config\":\"somevalue\"}", + ProcessInfoCache::cleanProcessCommandline(buck2_raw_cmdline)); +} + } // namespace facebook::eden