diff --git a/cpp/CMake/Buildjemalloc.cmake b/cpp/CMake/Buildjemalloc.cmake index 5491bd829533..67a0cc5f1167 100644 --- a/cpp/CMake/Buildjemalloc.cmake +++ b/cpp/CMake/Buildjemalloc.cmake @@ -22,7 +22,7 @@ macro(build_jemalloc) if(DEFINED ENV{GLUTEN_JEMALLOC_URL}) set(JEMALLOC_SOURCE_URL "$ENV{GLUTEN_JEMALLOC_URL}") else() - set(JEMALLOC_BUILD_VERSION "5.2.1") + set(JEMALLOC_BUILD_VERSION "5.3.0") set(JEMALLOC_SOURCE_URL "https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_BUILD_VERSION}/jemalloc-${JEMALLOC_BUILD_VERSION}.tar.bz2" "https://github.com/ursa-labs/thirdparty/releases/download/latest/jemalloc-${JEMALLOC_BUILD_VERSION}.tar.bz2" @@ -47,6 +47,8 @@ macro(build_jemalloc) # for dynamically linking. "--without-export" "--disable-cxx" "--disable-libdl" + # Enable heap profiling and leak detection functionality. + "--enable-prof" # For fixing an issue when loading native lib: cannot allocate memory in # static TLS block. "--disable-initial-exec-tls" diff --git a/docs/developers/HowTo.md b/docs/developers/HowTo.md index c4b1a03a36d8..22ad3e30efc7 100644 --- a/docs/developers/HowTo.md +++ b/docs/developers/HowTo.md @@ -122,6 +122,13 @@ gdb ${GLUTEN_HOME}/cpp/build/releases/libgluten.so 'core-Executor task l-2000883 ``` - the `core-Executor task l-2000883-1671542526` represents the core file name. +# How to use jemalloc for Gluten native engine + +Currently, we have no dedicated memory allocator implemented by jemalloc. User can set environment variable `LD_PRELOAD` for lib jemalloc +to let it override the corresponding C standard functions entirely. It may help alleviate OOM issues. + +`spark.executorEnv.LD_PREALOD=/path/to/libjemalloc.so` + # How to run TPC-H on Velox backend Now, both Parquet and DWRF format files are supported, related scripts and files are under the directory of `${GLUTEN_HOME}/backends-velox/workload/tpch`. diff --git a/docs/developers/ProfileMemoryOfGlutenWithVelox.md b/docs/developers/ProfileMemoryOfGlutenWithVelox.md index 480755cb99c5..1f57f809218c 100644 --- a/docs/developers/ProfileMemoryOfGlutenWithVelox.md +++ b/docs/developers/ProfileMemoryOfGlutenWithVelox.md @@ -5,13 +5,14 @@ nav_order: 8 has_children: true parent: /developer-overview/ --- -Gluten offloads most of computation to native engine. We can use [gperftools](https://github.com/gperftools/gperftools) or [jemalloc](https://github.com/jemalloc/jemalloc) to analyze the offheap memory and cpu profile. +Gluten offloads most of Spark SQL execution to native engine. We can use [gperftools](https://github.com/gperftools/gperftools) or [jemalloc](https://github.com/jemalloc/jemalloc) +to analyze the offheap memory and cpu profile. -# Profiling using gperftools +# Profile with gperftools `gperftools` is a collection of a high-performance multi-threaded malloc() implementation, plus some pretty nifty performance analysis -tools, see more: https://github.com/gperftools/gperftools/wiki +tools, see more: https://github.com/gperftools/gperftools/wiki. ## Build and install gperftools @@ -29,10 +30,10 @@ Then we can find the tcmalloc libraries in `$GPERFTOOLS_HOME/.lib`. ## Run Gluten with gperftools -Use `--file` or `spark.files` to upload tcmalloc library. +Configure `--files` or `spark.files` for Spark. ``` ---file /path/to/gperftools/libtcmalloc_and_profiler.so +--files /path/to/gperftools/libtcmalloc_and_profiler.so or spark.files /path/to/gperftools/libtcmalloc_and_profiler.so ``` @@ -48,14 +49,14 @@ spark.executorEnv.LD_PRELOAD ./libtcmalloc_and_profiler.so spark.executorEnv.HEAPPROFILE /tmp/gluten_heap_perf_${CONTAINER_ID} ``` -Finally, profiling files starting with `/tmp/gluten_heap_perf_${CONTAINER_ID}` will be generated in each spark executor. +Finally, profiling files prefixed with `/tmp/gluten_heap_perf_${CONTAINER_ID}` will be generated for each spark executor. -## Analyze output profiles +## Analyze profiling output -Prepare the required native libraries. We can extract the gluten and velox libraries from gluten bundle jar. (Maybe also need dependency libraries for non-static compilation) +Prepare the required native libraries. Assume static build is used for Gluten, there is no other shared dependency libs. ```bash -jar xf gluten-velox-bundle-spark3.5_2.12-centos_7_x86_64-1.2.0.jar libvelox.so libgluten.so +jar xf gluten-velox-bundle-spark3.5_2.12-centos_7_x86_64-1.2.0.jar relative/path/to/libvelox.so ralative/path/to/libgluten.so mv libvelox.so libgluten.so /path/to/gluten_lib_prefix ``` @@ -82,9 +83,11 @@ Result like: **\*\*** Get more help from https://github.com/gperftools/gperftools/wiki#documentation. -# Profiling using jemalloc +# Profile with jemalloc -`jemalloc` is a general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support. We can also use it to analyze Gluten performance. Getting Started with `jemalloc`: https://github.com/jemalloc/jemalloc/wiki/Getting-Started. +`jemalloc` is a general purpose malloc(3) implementation that emphasizes fragmentation +avoidance and scalable concurrency support. We can also use it to analyze Gluten performance. +Getting Started with `jemalloc`: https://github.com/jemalloc/jemalloc/wiki/Getting-Started. ## Build and install jemalloc @@ -99,10 +102,10 @@ Then we can find the jemalloc library in `$JEMALLOC_HOME/.lib`. ## Run Gluten with jemalloc -Use `--file` or `spark.files` to upload jemalloc library. +Configure `--files` or `spark.files` for Spark. ``` ---file /path/to/jemalloc/libjemalloc.so +--files /path/to/jemalloc/libjemalloc.so or spark.files /path/to/jemalloc/libjemalloc.so ``` @@ -114,14 +117,14 @@ spark.executorEnv.LD_PRELOAD ./libjemalloc.so spark.executorEnv.MALLOC_CONF prof:true,lg_prof_interval:30,prof_prefix:/tmp/gluten_heap_perf ``` -Finally, profiling files starting with `/tmp/gluten_heap_perf.${PID}` will be generated in each spark executor. +Finally, profiling files prefixed with `/tmp/gluten_heap_perf.${PID}` will be generated for each spark executor. -## Analyze output profiles +## Analyze profiling output -Prepare the required native libraries. We can extract the gluten and velox libraries from gluten bundle jar. (Maybe also need dependency libraries for non-static compilation) +Prepare the required native libraries. Assume static build is used for Gluten, so there is no other shared dependency libs. ```bash -jar xf gluten-velox-bundle-spark3.5_2.12-centos_7_x86_64-1.2.0.jar libvelox.so libgluten.so +jar xf gluten-velox-bundle-spark3.5_2.12-centos_7_x86_64-1.2.0.jar relative/path/to/libvelox.so relative/path/to/libgluten.so mv libvelox.so libgluten.so /path/to/gluten_lib_prefix ``` diff --git a/docs/get-started/build-guide.md b/docs/get-started/build-guide.md index 3c0c521e2181..cbaad979fb90 100644 --- a/docs/get-started/build-guide.md +++ b/docs/get-started/build-guide.md @@ -14,7 +14,7 @@ Please set them via `--`, e.g. `--build_type=Release`. | build_tests | Build gluten cpp tests. | OFF | | build_examples | Build udf example. | OFF | | build_benchmarks | Build gluten cpp benchmarks. | OFF | -| enable_jemalloc_stats | Print jemalloc stats for debugging. | OFF | +| enable_jemalloc_stats | Print jemalloc stats for debugging. | OFF | | build_protobuf | Build protobuf lib. | OFF | | enable_qat | Enable QAT for shuffle data de/compression. | OFF | | enable_iaa | Enable IAA for shuffle data de/compression. | OFF |