Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] main from mlc-ai:main #302

Merged
merged 13 commits into from
Nov 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@
[submodule "3rdparty/stb"]
path = 3rdparty/stb
url = https://github.com/nothings/stb.git
[submodule "3rdparty/xgrammar"]
path = 3rdparty/xgrammar
url = https://github.com/mlc-ai/xgrammar.git
2 changes: 1 addition & 1 deletion 3rdparty/tvm
1 change: 1 addition & 0 deletions 3rdparty/xgrammar
Submodule xgrammar added at d4f57c
12 changes: 8 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,11 @@ set(MLC_LLM_RUNTIME_LINKER_LIB "")
set(TOKENZIER_CPP_PATH 3rdparty/tokenizers-cpp)
add_subdirectory(${TOKENZIER_CPP_PATH} tokenizers EXCLUDE_FROM_ALL)


set(XGRAMMAR_PATH 3rdparty/xgrammar)
tvm_file_glob(GLOB_RECURSE MLC_LLM_SRCS cpp/*.cc)
tvm_file_glob(GLOB_RECURSE XGRAMMAR_SRCS ${XGRAMMAR_PATH}/cpp/*.cc)
list(FILTER XGRAMMAR_SRCS EXCLUDE REGEX "${XGRAMMAR_PATH}/cpp/pybind/.*\\.cc")
list(APPEND MLC_LLM_SRCS ${XGRAMMAR_SRCS})
add_library(mlc_llm_objs OBJECT ${MLC_LLM_SRCS})

set(
Expand All @@ -83,12 +86,14 @@ set(
set(MLC_LLM_COMPILE_DEFS ${MLC_LLM_COMPILE_DEFS} DMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
set(MLC_LLM_COMPILE_DEFS ${MLC_LLM_COMPILE_DEFS} __STDC_FORMAT_MACROS=1)
set(MLC_LLM_COMPILE_DEFS ${MLC_LLM_COMPILE_DEFS} PICOJSON_USE_INT64)
set(MLC_LLM_COMPILE_DEFS ${MLC_LLM_COMPILE_DEFS} XGRAMMAR_ENABLE_LOG_DEBUG=0)

target_include_directories(mlc_llm_objs PRIVATE ${MLC_LLM_INCLUDES})
target_compile_definitions(mlc_llm_objs PRIVATE ${MLC_LLM_COMPILE_DEFS})
target_include_directories(mlc_llm_objs PRIVATE ${TOKENZIER_CPP_PATH}/include)
target_compile_definitions(mlc_llm_objs PRIVATE -DMLC_LLM_EXPORTS)
target_include_directories(mlc_llm_objs PRIVATE ${MLC_LLM_INCLUDES})
target_include_directories(mlc_llm_objs PRIVATE 3rdparty/stb)
target_include_directories(mlc_llm_objs PRIVATE ${TOKENZIER_CPP_PATH}/include)
target_include_directories(mlc_llm_objs PRIVATE ${XGRAMMAR_PATH}/include)

add_library(mlc_llm SHARED $<TARGET_OBJECTS:mlc_llm_objs>)
add_library(mlc_llm_static STATIC $<TARGET_OBJECTS:mlc_llm_objs>)
Expand Down Expand Up @@ -135,7 +140,6 @@ add_library(mlc_llm_module SHARED $<TARGET_OBJECTS:mlc_llm_objs>)
target_link_libraries(mlc_llm_module PUBLIC tvm)
target_link_libraries(mlc_llm_module PRIVATE tokenizers_cpp)


set_property(TARGET mlc_llm_module APPEND PROPERTY LINK_OPTIONS "${MLC_VISIBILITY_FLAG}")
set_property(TARGET mlc_llm APPEND PROPERTY LINK_OPTIONS "${MLC_VISIBILITY_FLAG}")

Expand Down
100 changes: 54 additions & 46 deletions cmake/gen_cmake_config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections import namedtuple

Backend = namedtuple("Backend", ["name", "cmake_config_name", "prompt_str"])
Backend = namedtuple("Backend", ["name", "cmake_config_name", "prompt_str", "parent"])

if __name__ == "__main__":
tvm_home = "" # pylint: disable=invalid-name
Expand All @@ -13,65 +13,73 @@

cmake_config_str = f"set(TVM_SOURCE_DIR {tvm_home})\n"
cmake_config_str += "set(CMAKE_BUILD_TYPE RelWithDebInfo)\n"
cuda_backend = Backend("CUDA", "USE_CUDA", "Use CUDA? (y/n): ", None)
opencl_backend = Backend("OpenCL", "USE_OPENCL", "Use OpenCL? (y/n) ", None)
backends = [
Backend("CUDA", "USE_CUDA", "Use CUDA? (y/n): "),
Backend("CUTLASS", "USE_CUTLASS", "Use CUTLASS? (y/n): "),
Backend("CUBLAS", "USE_CUBLAS", "Use CUBLAS? (y/n): "),
Backend("ROCm", "USE_ROCM", "Use ROCm? (y/n): "),
Backend("Vulkan", "USE_VULKAN", "Use Vulkan? (y/n): "),
cuda_backend,
Backend("CUTLASS", "USE_CUTLASS", "Use CUTLASS? (y/n): ", cuda_backend),
Backend("CUBLAS", "USE_CUBLAS", "Use CUBLAS? (y/n): ", cuda_backend),
Backend("ROCm", "USE_ROCM", "Use ROCm? (y/n): ", None),
Backend("Vulkan", "USE_VULKAN", "Use Vulkan? (y/n): ", None),
Backend("Metal", "USE_METAL", "Use Metal (Apple M1/M2 GPU) ? (y/n): ", None),
opencl_backend,
Backend(
"Metal",
"USE_METAL",
"Use Metal (Apple M1/M2 GPU) ? (y/n): ",
"OpenCLHostPtr",
"USE_OPENCL_ENABLE_HOST_PTR",
"Use OpenCLHostPtr? (y/n): ",
opencl_backend,
),
Backend(
"OpenCL",
"USE_OPENCL",
"Use OpenCL? (y/n) ",
),
Backend("OpenCLHostPtr", "USE_OPENCL_ENABLE_HOST_PTR", "Use OpenCLHostPtr? (y/n): "),
]

enabled_backends = set()

for backend in backends:
while True:
use_backend = input(backend.prompt_str)
if use_backend in ["yes", "Y", "y"]:
cmake_config_str += f"set({backend.cmake_config_name} ON)\n"
enabled_backends.add(backend.name)
break
elif use_backend in ["no", "N", "n"]:
cmake_config_str += f"set({backend.cmake_config_name} OFF)\n"
break
else:
print(f"Invalid input: {use_backend}. Please input again.")
if backend.parent is not None and backend.parent.name not in enabled_backends:
cmake_config_str += f"set({backend.cmake_config_name} OFF)\n"
else:
while True:
use_backend = input(backend.prompt_str)
if use_backend in ["yes", "Y", "y"]:
cmake_config_str += f"set({backend.cmake_config_name} ON)\n"
enabled_backends.add(backend.name)
break
elif use_backend in ["no", "N", "n"]:
cmake_config_str += f"set({backend.cmake_config_name} OFF)\n"
break
else:
print(f"Invalid input: {use_backend}. Please input again.")

if "CUDA" in enabled_backends:
cmake_config_str += f"set(USE_THRUST ON)\n"

# FlashInfer related
use_flashInfer = False # pylint: disable=invalid-name
while True:
user_input = input("Use FlashInfer? (need CUDA w/ compute capability 80;86;89;90) (y/n): ")
if user_input in ["yes", "Y", "y"]:
cmake_config_str += "set(USE_FLASHINFER ON)\n"
cmake_config_str += "set(FLASHINFER_ENABLE_FP8 OFF)\n"
cmake_config_str += "set(FLASHINFER_ENABLE_BF16 OFF)\n"
cmake_config_str += "set(FLASHINFER_GEN_GROUP_SIZES 1 4 6 8)\n"
cmake_config_str += "set(FLASHINFER_GEN_PAGE_SIZES 16)\n"
cmake_config_str += "set(FLASHINFER_GEN_HEAD_DIMS 128)\n"
cmake_config_str += "set(FLASHINFER_GEN_KV_LAYOUTS 0 1)\n"
cmake_config_str += "set(FLASHINFER_GEN_POS_ENCODING_MODES 0 1)\n"
cmake_config_str += 'set(FLASHINFER_GEN_ALLOW_FP16_QK_REDUCTIONS "false")\n'
cmake_config_str += 'set(FLASHINFER_GEN_CASUALS "false" "true")\n'
use_flashInfer = True # pylint: disable=invalid-name
break
elif user_input in ["no", "N", "n"]:
cmake_config_str += "set(USE_FLASHINFER OFF)\n"
break
else:
print(f"Invalid input: {use_flashInfer}. Please input again.")
if "CUDA" in enabled_backends:
while True:
user_input = input(
"Use FlashInfer? (need CUDA w/ compute capability 80;86;89;90) (y/n): "
)
if user_input in ["yes", "Y", "y"]:
cmake_config_str += "set(USE_FLASHINFER ON)\n"
cmake_config_str += "set(FLASHINFER_ENABLE_FP8 OFF)\n"
cmake_config_str += "set(FLASHINFER_ENABLE_BF16 OFF)\n"
cmake_config_str += "set(FLASHINFER_GEN_GROUP_SIZES 1 4 6 8)\n"
cmake_config_str += "set(FLASHINFER_GEN_PAGE_SIZES 16)\n"
cmake_config_str += "set(FLASHINFER_GEN_HEAD_DIMS 128)\n"
cmake_config_str += "set(FLASHINFER_GEN_KV_LAYOUTS 0 1)\n"
cmake_config_str += "set(FLASHINFER_GEN_POS_ENCODING_MODES 0 1)\n"
cmake_config_str += 'set(FLASHINFER_GEN_ALLOW_FP16_QK_REDUCTIONS "false")\n'
cmake_config_str += 'set(FLASHINFER_GEN_CASUALS "false" "true")\n'
use_flashInfer = True # pylint: disable=invalid-name
break
elif user_input in ["no", "N", "n"]:
cmake_config_str += "set(USE_FLASHINFER OFF)\n"
break
else:
print(f"Invalid input: {use_flashInfer}. Please input again.")
else:
cmake_config_str += "set(USE_FLASHINFER OFF)\n"

if use_flashInfer:
while True:
user_input = input("Enter your CUDA compute capability: ")
Expand Down
175 changes: 0 additions & 175 deletions cpp/grammar/grammar.cc

This file was deleted.

Loading
Loading