From bd6ad6b8a903633a39de7dbb669f4ae17784dcb3 Mon Sep 17 00:00:00 2001 From: kilinchange Date: Wed, 3 Jan 2024 17:30:03 +0800 Subject: [PATCH] =?UTF-8?q?feat(hardware):=20=E5=AE=9E=E7=8E=B0=20mlu=20?= =?UTF-8?q?=E7=A1=AC=E4=BB=B6=E7=9B=B8=E5=85=B3=E7=9A=84=E5=87=BD=E6=95=B0?= =?UTF-8?q?=EF=BC=9B=E6=94=B9=E5=8F=98=E7=BC=96=E8=AF=91=E6=96=B9=E5=BC=8F?= =?UTF-8?q?=E6=8C=89=E7=85=A7=E4=BB=A5=E7=A1=AC=E4=BB=B6=E5=90=8D=E7=A7=B0?= =?UTF-8?q?=E5=91=BD=E5=90=8D=E7=9A=84=E7=9B=AE=E5=BD=95=E5=90=8D=E5=8C=BA?= =?UTF-8?q?=E5=88=86=E6=98=AF=E5=90=A6=E9=9C=80=E8=A6=81=E7=BC=96=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 5 +++ Makefile | 1 + src/02hardware/CMakeLists.txt | 12 +++++-- src/02hardware/include/hardware/device.h | 2 ++ src/02hardware/include/hardware/devices/mlu.h | 19 ++++++++++++ src/02hardware/src/devices/cpu/memory.cc | 10 +++--- src/02hardware/src/devices/cpu/memory.hh | 10 +++--- src/02hardware/src/devices/mlu/device.cc | 27 ++++++++++++++++ src/02hardware/src/devices/mlu/functions.cc | 19 ++++++++++++ src/02hardware/src/devices/mlu/functions.hh | 24 ++++++++++++++ src/02hardware/src/devices/mlu/memory.cc | 31 +++++++++++++++++++ src/02hardware/src/devices/mlu/memory.hh | 18 +++++++++++ src/02hardware/src/devices/nvidia/device.cc | 12 ++----- 13 files changed, 167 insertions(+), 23 deletions(-) create mode 100644 src/02hardware/include/hardware/devices/mlu.h create mode 100644 src/02hardware/src/devices/mlu/device.cc create mode 100644 src/02hardware/src/devices/mlu/functions.cc create mode 100644 src/02hardware/src/devices/mlu/functions.hh create mode 100644 src/02hardware/src/devices/mlu/memory.cc create mode 100644 src/02hardware/src/devices/mlu/memory.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 5521ed552..45116732d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,7 @@ message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION}) option(ABSL_PROPAGATE_CXX_STD "Abseil need this option" ON) option(USE_CUDA "Support Nvidia GPU" OFF) option(USE_KUNLUN "Support Baidu Kunlunxin" OFF) +option(USE_BANG "Support Hanwuji MLU" OFF) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -41,6 +42,10 @@ if(USE_KUNLUN) message(STATUS "KUNLUN_HOME: ${KUNLUN_HOME}") endif() +if (USE_BANG) + add_compile_definitions(USE_BANG) +endif() + add_compile_options(-march=native) # this will cause error in some machine add_compile_options(-mtune=native) add_compile_options(-Wall) diff --git a/Makefile b/Makefile index cf3e402ce..571515f5b 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ TYPE ?= Debug CUDA ?= OFF KUNLUN ?= OFF +BANG ?= OFF CMAKE_EXTRA = # CMAKE_EXTRA += -DCMAKE_CXX_COMPILER= diff --git a/src/02hardware/CMakeLists.txt b/src/02hardware/CMakeLists.txt index ece758395..1e38c5e2e 100644 --- a/src/02hardware/CMakeLists.txt +++ b/src/02hardware/CMakeLists.txt @@ -2,12 +2,18 @@ cmake_minimum_required(VERSION 3.12 FATAL_ERROR) project(hardware VERSION 0.0.0 LANGUAGES CXX) message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION}) +# Source files +file(GLOB HARDWARE_SRC src/*.cc src/*.cpp src/devices/cpu/*.cc) + if(USE_CUDA) - file(GLOB_RECURSE HARDWARE_CUDA_SRC src/*.cu) + file(GLOB_RECURSE HARDWARE_CUDA_SRC src/devices/nvidia/*.cu src/devices/nvidia/*.cc) +endif() + +if(USE_BANG) + file(GLOB_RECURSE HARDWARE_BANG_SRC src/devices/mlu/*.cc) endif() -file(GLOB_RECURSE HARDWARE_SRC src/*.cc src/*.cpp) -add_library(hardware STATIC ${HARDWARE_SRC} ${HARDWARE_CUDA_SRC}) +add_library(hardware STATIC ${HARDWARE_SRC} ${HARDWARE_CUDA_SRC} ${HARDWARE_BANG_SRC}) target_link_libraries(hardware PUBLIC common) target_include_directories(hardware PUBLIC include) diff --git a/src/02hardware/include/hardware/device.h b/src/02hardware/include/hardware/device.h index 5809fcf94..cb65a2730 100644 --- a/src/02hardware/include/hardware/device.h +++ b/src/02hardware/include/hardware/device.h @@ -11,6 +11,8 @@ namespace refactor::hardware { enum class Type : int32_t { Cpu, Nvidia, + Mlu, + Kunlun, }; protected: diff --git a/src/02hardware/include/hardware/devices/mlu.h b/src/02hardware/include/hardware/devices/mlu.h new file mode 100644 index 000000000..aa3b76529 --- /dev/null +++ b/src/02hardware/include/hardware/devices/mlu.h @@ -0,0 +1,19 @@ +#ifndef HARDWARE_DEVICES_MLU_H +#define HARDWARE_DEVICES_MLU_H + +#include "../device.h" + +namespace refactor::hardware { + + class Mlu final : public Device { + public: + explicit Mlu(int32_t card); + void setContext() const noexcept final; + Type type() const noexcept final { + return Type::Mlu; + } + }; + +}// namespace refactor::hardware + +#endif// HARDWARE_DEVICES_MLU_H diff --git a/src/02hardware/src/devices/cpu/memory.cc b/src/02hardware/src/devices/cpu/memory.cc index 4db56e98e..dbd64f51e 100644 --- a/src/02hardware/src/devices/cpu/memory.cc +++ b/src/02hardware/src/devices/cpu/memory.cc @@ -5,19 +5,19 @@ namespace refactor::hardware { using M = CpuMemory; - void *M::malloc(size_t size) noexcept { + void *M::malloc(size_t size) { return std::malloc(size); } - void M::free(void *ptr) noexcept { + void M::free(void *ptr) { std::free(ptr); } - void *M::copyHD(void *dst, void const *src, size_t bytes) const noexcept { + void *M::copyHD(void *dst, void const *src, size_t bytes) const { return std::memcpy(dst, src, bytes); } - void *M::copyDH(void *dst, void const *src, size_t bytes) const noexcept { + void *M::copyDH(void *dst, void const *src, size_t bytes) const { return std::memcpy(dst, src, bytes); } - void *M::copyDD(void *dst, void const *src, size_t bytes) const noexcept { + void *M::copyDD(void *dst, void const *src, size_t bytes) const { return std::memcpy(dst, src, bytes); } diff --git a/src/02hardware/src/devices/cpu/memory.hh b/src/02hardware/src/devices/cpu/memory.hh index 5bd3a1dda..d1681b24d 100644 --- a/src/02hardware/src/devices/cpu/memory.hh +++ b/src/02hardware/src/devices/cpu/memory.hh @@ -6,11 +6,11 @@ namespace refactor::hardware { class CpuMemory final : public Memory { - void *malloc(size_t) noexcept final; - void free(void *) noexcept final; - void *copyHD(void *dst, void const *src, size_t bytes) const noexcept final; - void *copyDH(void *dst, void const *src, size_t bytes) const noexcept final; - void *copyDD(void *dst, void const *src, size_t bytes) const noexcept final; + void *malloc(size_t) final; + void free(void *) final; + void *copyHD(void *dst, void const *src, size_t bytes) const final; + void *copyDH(void *dst, void const *src, size_t bytes) const final; + void *copyDD(void *dst, void const *src, size_t bytes) const final; }; }// namespace refactor::hardware diff --git a/src/02hardware/src/devices/mlu/device.cc b/src/02hardware/src/devices/mlu/device.cc new file mode 100644 index 000000000..87b6150db --- /dev/null +++ b/src/02hardware/src/devices/mlu/device.cc @@ -0,0 +1,27 @@ +#include "functions.cc" +#include "hardware/devices/mlu.h" +#include "hardware/mem_pool.h" +#include "memory.hh" + +namespace refactor::hardware { + + static Arc bangMemory(int32_t card) { + ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card); + setDevice(card); + auto [free, total] = getMemInfo(); + auto size = std::min(free, std::max(5ul << 30, total * 4 / 5)); + fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}", + card, free, total, size); + return std::make_shared( + std::make_shared(), + size, + 256ul); + } + + Mlu::Mlu(int32_t card) : Device(card, bangMemory(card)) {} + + void Mlu::setContext() const noexcept { + setDevice(_card); + } + +}// namespace refactor::hardware diff --git a/src/02hardware/src/devices/mlu/functions.cc b/src/02hardware/src/devices/mlu/functions.cc new file mode 100644 index 000000000..28918bcec --- /dev/null +++ b/src/02hardware/src/devices/mlu/functions.cc @@ -0,0 +1,19 @@ +#include "functions.hh" + +namespace refactor::hardware { + + int getDeviceCount() { + int deviceCount; + BANG_ASSERT(cnrtGetDeviceCount(&deviceCount)); + return deviceCount; + } + void setDevice(int device) { + BANG_ASSERT(cnrtSetDevice(device)); + } + MemInfo getMemInfo() { + MemInfo memInfo; + BANG_ASSERT(cudaMemGetInfo(&memInfo.free, &memInfo.total)); + return memInfo; + } + +}// namespace refactor::hardware diff --git a/src/02hardware/src/devices/mlu/functions.hh b/src/02hardware/src/devices/mlu/functions.hh new file mode 100644 index 000000000..cb5a561c2 --- /dev/null +++ b/src/02hardware/src/devices/mlu/functions.hh @@ -0,0 +1,24 @@ +#ifndef HARDWARE_DEVICES_MLU_FUNCTIONS_CUH +#define HARDWARE_DEVICES_MLU_FUNCTIONS_CUH + +#include "common.h" + +#define BANG_ASSERT(STATUS) \ + if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \ + RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \ + cnrtGetErrorStr(status), (int) status)); \ + } + +namespace refactor::hardware { + + struct MemInfo { + size_t free, total; + }; + + int getDeviceCount(); + void setDevice(int device); + MemInfo getMemInfo(); + +}// namespace refactor::hardware + +#endif// HARDWARE_DEVICES_NVIDIA_FUNCTIONS_CUH diff --git a/src/02hardware/src/devices/mlu/memory.cc b/src/02hardware/src/devices/mlu/memory.cc new file mode 100644 index 000000000..81b3c626a --- /dev/null +++ b/src/02hardware/src/devices/mlu/memory.cc @@ -0,0 +1,31 @@ +#include "memory.hh" +#include "functions.hh" + +namespace refactor::hardware { + using M = MluMemory; + + void *M::malloc(size_t size) { + void *ptr; + BANG_ASSERT(cnrtMalloc(&ptr, size)); + return ptr; + } + void M::free(void *ptr) { + BANG_ASSERT(cnrtFree(ptr)); + } + void *M::copyHD(void *dst, void const *src, size_t bytes) const { + BANG_ASSERT(cnrtMemcpy(dst, const_cast(src), bytes, + CNRT_MEM_TRANS_DIR_HOST2DEV)) + return dst; + } + void *M::copyDH(void *dst, void const *src, size_t bytes) const { + BANG_ASSERT(cnrtMemcpy(dst, const_cast(src), bytes, + CNRT_MEM_TRANS_DIR_DEV2HOST)); + return dst; + } + void *M::copyDD(void *dst, void const *src, size_t bytes) const { + BANG_ASSERT(cnrtMemcpy(dst, const_cast(src), bytes, + CNRT_MEM_TRANS_DIR_PEER2PEER)); + return dst; + } + +}// namespace refactor::hardware diff --git a/src/02hardware/src/devices/mlu/memory.hh b/src/02hardware/src/devices/mlu/memory.hh new file mode 100644 index 000000000..85ec39887 --- /dev/null +++ b/src/02hardware/src/devices/mlu/memory.hh @@ -0,0 +1,18 @@ +#ifndef HARDWARE_DEVICES_MLU_MEMORY_CUH +#define HARDWARE_DEVICES_MLU_MEMORY_CUH + +#include "hardware/memory.h" + +namespace refactor::hardware { + + class MluMemory final : public Memory { + void *malloc(size_t) final; + void free(void *) final; + void *copyHD(void *dst, void const *src, size_t bytes) const final; + void *copyDH(void *dst, void const *src, size_t bytes) const final; + void *copyDD(void *dst, void const *src, size_t bytes) const final; + }; + +}// namespace refactor::hardware + +#endif// HARDWARE_DEVICES_MLU_MEMORY_HH diff --git a/src/02hardware/src/devices/nvidia/device.cc b/src/02hardware/src/devices/nvidia/device.cc index 1ae5b2244..403921cba 100644 --- a/src/02hardware/src/devices/nvidia/device.cc +++ b/src/02hardware/src/devices/nvidia/device.cc @@ -1,14 +1,11 @@ -#include "hardware/devices/nvidia.h" +#include "functions.cuh" +#include "hardware/devices/nvidia.h" #include "hardware/mem_pool.h" -#ifdef USE_CUDA -#include "functions.cuh" #include "memory.cuh" -#endif namespace refactor::hardware { static Arc cudaMemory(int32_t card) { -#ifdef USE_CUDA ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card); setDevice(card); auto [free, total] = getMemInfo(); @@ -19,17 +16,12 @@ namespace refactor::hardware { std::make_shared(), size, 256ul); -#else - RUNTIME_ERROR("CUDA is not enabled"); -#endif } Nvidia::Nvidia(int32_t card) : Device(card, cudaMemory(card)) {} void Nvidia::setContext() const noexcept { -#ifdef USE_CUDA setDevice(_card); -#endif } }// namespace refactor::hardware