From 2ac8a91fbe0108144dafb04cff4049da3c288a4d Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 13 Sep 2024 10:08:55 +0300 Subject: [PATCH 1/2] cmake : do not hide GGML options ggml-ci --- CMakeLists.txt | 4 ++-- ggml/CMakeLists.txt | 13 +++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2440193138013..c79852ab5e650 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,11 +82,11 @@ set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS}) # change the default for these ggml options if (NOT DEFINED GGML_LLAMAFILE) - set(GGML_LLAMAFILE ON) + set(GGML_LLAMAFILE_DEFAULT ON) endif() if (NOT DEFINED GGML_CUDA_USE_GRAPHS) - set(GGML_CUDA_USE_GRAPHS ON) + set(GGML_CUDA_USE_GRAPHS_DEFAULT ON) endif() # transition helpers diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 532534bcb97e3..31c342c16fb83 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -56,6 +56,15 @@ else() set(GGML_NATIVE_DEFAULT ON) endif() +# defaults +if (NOT GGML_LLAMAFILE_DEFAULT) + set(GGML_LLAMAFILE_DEFAULT OFF) +endif() + +if (NOT GGML_CUDA_USE_GRAPHS_DEFAULT) + set(GGML_CUDA_USE_GRAPHS_DEFAULT OFF) +endif() + # general option(GGML_STATIC "ggml: static link libraries" OFF) option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT}) @@ -110,7 +119,7 @@ option(GGML_ACCELERATE "ggml: enable Accelerate framework" option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT}) set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING "ggml: BLAS library vendor") -option(GGML_LLAMAFILE "ggml: use LLAMAFILE" OFF) +option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT}) option(GGML_CUDA "ggml: use CUDA" OFF) option(GGML_MUSA "ggml: use MUSA" OFF) @@ -127,7 +136,7 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF) option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) -option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF) +option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_USE_GRAPHS_DEFAULT}) option(GGML_HIPBLAS "ggml: use hipBLAS" OFF) option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF) From f80e679696ef733c6d431a186b4eaa2d1dbc9053 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 16 Sep 2024 09:00:43 +0300 Subject: [PATCH 2/2] build : rename flag GGML_CUDA_USE_GRAPHS -> GGML_CUDA_GRAPHS for consistency ggml-ci --- CMakeLists.txt | 4 ++-- Makefile | 2 +- ggml/CMakeLists.txt | 6 +++--- ggml/src/CMakeLists.txt | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c79852ab5e650..973907819d0d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,8 +85,8 @@ if (NOT DEFINED GGML_LLAMAFILE) set(GGML_LLAMAFILE_DEFAULT ON) endif() -if (NOT DEFINED GGML_CUDA_USE_GRAPHS) - set(GGML_CUDA_USE_GRAPHS_DEFAULT ON) +if (NOT DEFINED GGML_CUDA_GRAPHS) + set(GGML_CUDA_GRAPHS_DEFAULT ON) endif() # transition helpers diff --git a/Makefile b/Makefile index cb5ff9f9dc9af..f922f7083b7c9 100644 --- a/Makefile +++ b/Makefile @@ -619,7 +619,7 @@ ifdef GGML_CUDA CUDA_PATH ?= /usr/local/cuda endif - MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS + MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib MK_NVCCFLAGS += -use_fast_math endif # GGML_MUSA diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 31c342c16fb83..89fdf9d1c11ed 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -61,8 +61,8 @@ if (NOT GGML_LLAMAFILE_DEFAULT) set(GGML_LLAMAFILE_DEFAULT OFF) endif() -if (NOT GGML_CUDA_USE_GRAPHS_DEFAULT) - set(GGML_CUDA_USE_GRAPHS_DEFAULT OFF) +if (NOT GGML_CUDA_GRAPHS_DEFAULT) + set(GGML_CUDA_GRAPHS_DEFAULT OFF) endif() # general @@ -136,7 +136,7 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF) option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) -option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_USE_GRAPHS_DEFAULT}) +option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT}) option(GGML_HIPBLAS "ggml: use hipBLAS" OFF) option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 11b877e194e07..042ea9b77cc17 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -329,7 +329,7 @@ if (GGML_CUDA) add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER}) add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE}) - if (GGML_CUDA_USE_GRAPHS) + if (GGML_CUDA_GRAPHS) add_compile_definitions(GGML_CUDA_USE_GRAPHS) endif()