From b139b5090febf6b8d835a2094f2617d91f8af1af Mon Sep 17 00:00:00 2001
From: Valentin Andrei <vandrei@meta.com>
Date: Tue, 9 Jul 2024 08:03:45 +0000
Subject: [PATCH] [pytorch] Name threads in thread pools for better debugging
 (#130270)

Threads inside the thread pools are not named, so they inherit the main process name or the name of the first thread. In our case if we set `pt_main_thread` as the thread name when a thread does `import torch`, this name will be inherited by all the threads in the created pools.

This PR names the threads in the pools I was able to find. There are other pools created, like OpenMP ones and we need to follow-up on those.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/130270
Approved by: https://github.com/d4l3k, https://github.com/albanD
---
 c10/core/thread_pool.cpp                               | 2 ++
 caffe2/utils/threadpool/WorkersPool.h                  | 5 ++++-
 torch/csrc/distributed/autograd/engine/dist_engine.cpp | 2 ++
 torch/csrc/lazy/core/thread_pool.cpp                   | 6 +++++-
 4 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/c10/core/thread_pool.cpp b/c10/core/thread_pool.cpp
index 10fdd9d9f769b..dfe6cfaeb3343 100644
--- a/c10/core/thread_pool.cpp
+++ b/c10/core/thread_pool.cpp
@@ -1,5 +1,6 @@
 #include <c10/core/thread_pool.h>
 #include <c10/util/Logging.h>
+#include <c10/util/thread_name.h>
 #if !defined(__powerpc__) && !defined(__s390x__)
 #include <cpuinfo.h>
 #endif
@@ -41,6 +42,7 @@ ThreadPool::ThreadPool(
       numa_node_id_(numa_node_id) {
   for (std::size_t i = 0; i < threads_.size(); ++i) {
     threads_[i] = std::thread([this, i, init_thread]() {
+      c10::setThreadName("pt_thread_pool");
       if (init_thread) {
         init_thread();
       }
diff --git a/caffe2/utils/threadpool/WorkersPool.h b/caffe2/utils/threadpool/WorkersPool.h
index 23a72b02465ea..2f964712b1580 100644
--- a/caffe2/utils/threadpool/WorkersPool.h
+++ b/caffe2/utils/threadpool/WorkersPool.h
@@ -232,7 +232,10 @@ class alignas(kGEMMLOWPCacheLineSize) Worker {
       : task_(nullptr),
         state_(State::ThreadStartup),
         counter_to_decrement_when_ready_(counter_to_decrement_when_ready) {
-    thread_ = std::make_unique<std::thread>([this]() { this->ThreadFunc(); });
+    thread_ = std::make_unique<std::thread>([this]() {
+      c10::setThreadName("pt_thread_pool");
+      this->ThreadFunc();
+    });
   }
 
   ~Worker() {
diff --git a/torch/csrc/distributed/autograd/engine/dist_engine.cpp b/torch/csrc/distributed/autograd/engine/dist_engine.cpp
index d37e695c77194..c213b88abae94 100644
--- a/torch/csrc/distributed/autograd/engine/dist_engine.cpp
+++ b/torch/csrc/distributed/autograd/engine/dist_engine.cpp
@@ -4,6 +4,7 @@
 #include <c10/core/Event.h>
 #include <c10/util/DeadlockDetection.h>
 #include <c10/util/irange.h>
+#include <c10/util/thread_name.h>
 #include <torch/csrc/autograd/functions/accumulate_grad.h>
 #include <torch/csrc/autograd/input_buffer.h>
 #include <torch/csrc/distributed/autograd/context/container.h>
@@ -76,6 +77,7 @@ class DistAccumulateGradCaptureHook
 
 void DistEngine::globalCpuThread(
     const std::shared_ptr<ReadyQueue>& ready_queue) {
+  c10::setThreadName("pt_dist_engine");
   while (true) {
     NodeTask task = ready_queue->pop();
     if (task.isShutdownTask_) {
diff --git a/torch/csrc/lazy/core/thread_pool.cpp b/torch/csrc/lazy/core/thread_pool.cpp
index fee4cd47dd7ad..de9a6d8ea4dd4 100644
--- a/torch/csrc/lazy/core/thread_pool.cpp
+++ b/torch/csrc/lazy/core/thread_pool.cpp
@@ -2,6 +2,7 @@
 
 #include <c10/util/Logging.h>
 #include <c10/util/irange.h>
+#include <c10/util/thread_name.h>
 #include <torch/csrc/lazy/core/config.h>
 #include <torch/csrc/lazy/core/metrics.h>
 
@@ -21,7 +22,10 @@ class ThreadPool {
     threads_.reserve(num_threads);
     for (const auto i : c10::irange(num_threads)) {
       (void)i; // Suppress unused variable warning
-      threads_.emplace_back([this]() { Worker(); });
+      threads_.emplace_back([this]() {
+        c10::setThreadName("pt_thread_pool");
+        Worker();
+      });
     }
   }