From 6995acdc063b1435cfa7cade4a81bc9f05b57e30 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Mon, 6 Nov 2023 14:48:06 +0800 Subject: [PATCH 1/3] add using gperftools doc for ch --- docs/developers/UsingGperftoolsInCH.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 docs/developers/UsingGperftoolsInCH.md diff --git a/docs/developers/UsingGperftoolsInCH.md b/docs/developers/UsingGperftoolsInCH.md new file mode 100644 index 000000000000..39c6484dffb3 --- /dev/null +++ b/docs/developers/UsingGperftoolsInCH.md @@ -0,0 +1,21 @@ +We need using gpertools to find the memory or CPU issue. That's what this document is about. + +## Install gperftools +Install gperftools as described in https://github.com/gperftools/gperftools. +We get the library and the command line tools. + +## Run Gluten with gperftools +For Spark on Yarn, we can change the submit script to run Gluten with gperftools. +Add the following to the submit script: +``` +export tcmalloc_path=/data2/zzb/gperftools-2.10/.libs/libtcmalloc_and_profiler.so # the path to the tcmalloc library +export LD_PRELOAD=$tcmalloc_path,libch.so # load the library in the driver +--files $tcmalloc_path # upload the library to the cluster +--conf spark.executorEnv.LD_PRELOAD=./libtcmalloc_and_profiler.so,libch.so # load the library in the executor +--conf spark.executorEnv.HEAPPROFILE=/tmp/gluten_heap_perf # set the heap profile path, you can change to CPUPROFILE for CPU profiling +``` + +## Analyze the result +We can get the result in the path we set in the previous step. For example, we can get the result in `/tmp/gluten_heap_perf`. We can use the following website to analyze the result: +https://gperftools.github.io/gperftools/heapprofile.html +https://gperftools.github.io/gperftools/cpuprofile.html \ No newline at end of file From c53691a574c29315f5016534339511a88eba60f4 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Wed, 8 Nov 2023 12:24:14 +0800 Subject: [PATCH 2/3] Update UsingGperftoolsInCH.md --- docs/developers/UsingGperftoolsInCH.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/developers/UsingGperftoolsInCH.md b/docs/developers/UsingGperftoolsInCH.md index 39c6484dffb3..ce51440f0555 100644 --- a/docs/developers/UsingGperftoolsInCH.md +++ b/docs/developers/UsingGperftoolsInCH.md @@ -4,6 +4,9 @@ We need using gpertools to find the memory or CPU issue. That's what this docume Install gperftools as described in https://github.com/gperftools/gperftools. We get the library and the command line tools. +## Compiler libch.so +Disable jemalloc `-DENABLE_JEMALLOC=OFF` in cpp-ch/CMakeLists.txt, and recompile libch.so. + ## Run Gluten with gperftools For Spark on Yarn, we can change the submit script to run Gluten with gperftools. Add the following to the submit script: @@ -18,4 +21,4 @@ export LD_PRELOAD=$tcmalloc_path,libch.so # load the library in the driver ## Analyze the result We can get the result in the path we set in the previous step. For example, we can get the result in `/tmp/gluten_heap_perf`. We can use the following website to analyze the result: https://gperftools.github.io/gperftools/heapprofile.html -https://gperftools.github.io/gperftools/cpuprofile.html \ No newline at end of file +https://gperftools.github.io/gperftools/cpuprofile.html From df5cd132bea7c4e18548d2460db91fec23f03645 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Wed, 8 Nov 2023 15:41:33 +0800 Subject: [PATCH 3/3] Update UsingGperftoolsInCH.md --- docs/developers/UsingGperftoolsInCH.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/developers/UsingGperftoolsInCH.md b/docs/developers/UsingGperftoolsInCH.md index ce51440f0555..f0d5c720b30b 100644 --- a/docs/developers/UsingGperftoolsInCH.md +++ b/docs/developers/UsingGperftoolsInCH.md @@ -18,6 +18,8 @@ export LD_PRELOAD=$tcmalloc_path,libch.so # load the library in the driver --conf spark.executorEnv.HEAPPROFILE=/tmp/gluten_heap_perf # set the heap profile path, you can change to CPUPROFILE for CPU profiling ``` +For thrift server on local machine, note using `export LD_PRELOAD="$tcmalloc_path libch.so" # load the library in the driver` to preload dynamic libraries. + ## Analyze the result We can get the result in the path we set in the previous step. For example, we can get the result in `/tmp/gluten_heap_perf`. We can use the following website to analyze the result: https://gperftools.github.io/gperftools/heapprofile.html