Skip to content

Commit

Permalink
[Backend] Bump TRTLLM to v.0.17.0 (#2991)
Browse files Browse the repository at this point in the history
* backend(trtllm): bump TRTLLM to v.0.17.0

* backend(trtllm): forget to bump dockerfile

* backend(trtllm): use arg instead of env

* backend(trtllm): use correct library reference decoder_attention_src

* backend(trtllm): link against decoder_attention_{0|1}

* backend(trtllm): build against gcc-14 with cuda12.8

* backend(trtllm): use return value optimization flag as as error if available

* backend(trtllm): make sure we escalade all warnings as errors on the backend impl in debug mode

* backend(trtllm): link against CUDA 12.8
  • Loading branch information
mfuntowicz authored Feb 6, 2025
1 parent 36223f8 commit 856709d
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 19 deletions.
14 changes: 9 additions & 5 deletions Dockerfile_trtllm
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real"
ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real;100-real;120-real"
ARG cuda_base=12.8.0
ARG build_type=release
ARG ompi_version=4.1.7
ARG sccache_gha_enabled=off
ARG actions_cache_url=""
ARG actions_runtime_token=""


# CUDA dependent dependencies resolver stage
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder
FROM nvidia/cuda:${cuda_base}-cudnn-devel-ubuntu24.04 AS cuda-builder

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
Expand Down Expand Up @@ -98,14 +100,16 @@ COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi

ENV RUSTC_WRAPPER=sccache
ENV CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX
RUN export CMAKE_C_COMPILER_LAUNCHER=sccache && \
RUN export CC=gcc-14 \
export CXX=g++-14 \
export CMAKE_C_COMPILER_LAUNCHER=sccache && \
export CMAKE_CXX_COMPILER_LAUNCHER=sccache && \
export CMAKE_CUDA_COMPILER_LAUNCHER=sccache && \
mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \
sccache --show-stats

FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime
FROM nvidia/cuda:${cuda_base}-cudnn-runtime-ubuntu24.04 AS runtime
RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
pipx ensurepath && \
Expand All @@ -124,7 +128,7 @@ COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi
COPY --from=tgi-builder /usr/src/text-generation-inference/target/release/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher

# This is used only for the CI/CD
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS ci-runtime
FROM nvidia/cuda:${cuda_base}-cudnn-runtime-ubuntu24.04 AS ci-runtime
RUN apt update && apt install -y libasan8 libubsan1 libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
pipx ensurepath && \
Expand Down
7 changes: 5 additions & 2 deletions backends/trtllm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ target_link_libraries(tgi_trtllm_backend_impl PRIVATE tensorrt_llm nvinfer_plugi

# This install all the artifacts in CMAKE_INSTALL_PREFIX under include/ lib/ bin/ to make easy to link / find it back
install(TARGETS tgi_trtllm_backend_impl)
install(TARGETS tensorrt_llm nvinfer_plugin_tensorrt_llm decoder_attention executorWorker)
#install(TARGETS cutlass_src fb_gemm_src fpA_intB_gemm_src gemm_swiglu_sm90_src kernels_src)
install(TARGETS decoder_attention_0 decoder_attention_1)
install(TARGETS tensorrt_llm nvinfer_plugin_tensorrt_llm decoder_attention_src executorWorker)
install(FILES ${TRTLLM_NVRTC_WRAPPER_LIBRARY_PATH} TYPE LIB)
if (NOT ${TGI_TRTLLM_BACKEND_DEBUG})
install(FILES ${TRTLLM_EXECUTOR_STATIC_LIBRARY_PATH} TYPE LIB)
Expand All @@ -82,8 +84,9 @@ if (${TGI_TRTLLM_BACKEND_BUILD_TESTS} AND CMAKE_BUILD_TYPE MATCHES "Debug")
check_cxx_compiler_flag("-Wnrvo" COMPILER_SUPPORT_WARNING_ON_NVRO)
if (${COMPILER_SUPPORT_WARNING_ON_NVRO})
message(STATUS "Enabling non-NVRO detection")
target_compile_options(tgi_trtllm_backend_impl "-Wnvro")
target_compile_options(tgi_trtllm_backend_impl PRIVATE -Wnrvo)
endif ()
target_compile_options(tgi_trtllm_backend_impl PRIVATE -Wall)

cmake_path(GET TRTLLM_NVRTC_WRAPPER_LIBRARY_PATH PARENT_PATH TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH)
message(STATUS "Adding linking path: ${TRTLLM_NVRTC_WRAPPER_PARENT_LIBRARY_PATH}")
Expand Down
7 changes: 4 additions & 3 deletions backends/trtllm/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::sync::LazyLock;

const ADDITIONAL_BACKEND_LINK_LIBRARIES: [&str; 1] = ["spdlog"];
const CUDA_ARCH_LIST: Option<&str> = option_env!("CUDA_ARCH_LIST");
const CUDA_REQUIRED_VERSION: &str = "12.6";
const CUDA_REQUIRED_VERSION: &str = "12.8";
const MPI_REQUIRED_VERSION: &str = "4.1";
const INSTALL_PREFIX: Option<&str> = option_env!("CMAKE_INSTALL_PREFIX");
const TENSORRT_ROOT_DIR: Option<&str> = option_env!("TENSORRT_ROOT_DIR");
Expand All @@ -25,11 +25,12 @@ const IS_GHA_BUILD: LazyLock<bool> = LazyLock::new(|| {
// Dependencies
const BACKEND_DEPS: &str = "tgi_trtllm_backend_impl";
const CUDA_TRANSITIVE_DEPS: [&str; 4] = ["cuda", "cudart", "cublas", "nvidia-ml"];
const TENSORRT_LLM_TRANSITIVE_DEPS: [(&str, &str); 4] = [
const TENSORRT_LLM_TRANSITIVE_DEPS: [(&str, &str); 5] = [
("dylib", "tensorrt_llm"),
("dylib", "tensorrt_llm_nvrtc_wrapper"),
("dylib", "nvinfer_plugin_tensorrt_llm"),
("dylib", "decoder_attention"),
("dylib", "decoder_attention_0"),
("dylib", "decoder_attention_1"),
];

macro_rules! probe {
Expand Down
2 changes: 1 addition & 1 deletion backends/trtllm/cmake/trtllm.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ find_package(Python3 REQUIRED Interpreter)
fetchcontent_declare(
trtllm
GIT_REPOSITORY https://github.com/nvidia/TensorRT-LLM.git
GIT_TAG v0.16.0
GIT_TAG v0.17.0
GIT_SHALLOW ON
DOWNLOAD_EXTRACT_TIMESTAMP
)
Expand Down
16 changes: 8 additions & 8 deletions backends/trtllm/scripts/install_tensorrt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

set -ex

TRT_VER_BASE="10.7.0"
TRT_VER_FULL="${TRT_VER_BASE}.23"
CUDA_VER="12.6"
CUDNN_VER="9.5.0.50-1"
NCCL_VER="2.22.3-1+cuda12.6"
CUBLAS_VER="12.6.3.3-1"
NVRTC_VER="12.6.77-1"
TRT_VER_BASE="10.8.0"
TRT_VER_FULL="${TRT_VER_BASE}.43"
CUDA_VER="12.8"
CUDNN_VER="9.7.0.66-1"
NCCL_VER="2.25.1-1+cuda${CUDA_VER}"
CUBLAS_VER="${CUDA_VER}.3.14-1"
NVRTC_VER="${CUDA_VER}.61-1"

for i in "$@"; do
case $i in
Expand Down Expand Up @@ -73,7 +73,7 @@ install_centos_requirements() {
install_tensorrt() {
#PY_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
#PARSED_PY_VERSION=$(echo "${PY_VERSION//./}")
TRT_CUDA_VERSION="12.6"
TRT_CUDA_VERSION="12.8"

if [ -z "$RELEASE_URL_TRT" ];then
ARCH=${TRT_TARGETARCH}
Expand Down

0 comments on commit 856709d

Please sign in to comment.