Skip to content

Commit

Permalink
feat(hardware): 实现 mlu 硬件相关的函数;改变编译方式按照以硬件名称命名的目录名区分是否需要编译
Browse files Browse the repository at this point in the history
  • Loading branch information
kilinchange committed Jan 4, 2024
1 parent 535134b commit bd6ad6b
Show file tree
Hide file tree
Showing 13 changed files with 167 additions and 23 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION})
option(ABSL_PROPAGATE_CXX_STD "Abseil need this option" ON)
option(USE_CUDA "Support Nvidia GPU" OFF)
option(USE_KUNLUN "Support Baidu Kunlunxin" OFF)
option(USE_BANG "Support Hanwuji MLU" OFF)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
Expand Down Expand Up @@ -41,6 +42,10 @@ if(USE_KUNLUN)
message(STATUS "KUNLUN_HOME: ${KUNLUN_HOME}")
endif()

if (USE_BANG)
add_compile_definitions(USE_BANG)
endif()

add_compile_options(-march=native) # this will cause error in some machine
add_compile_options(-mtune=native)
add_compile_options(-Wall)
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
TYPE ?= Debug
CUDA ?= OFF
KUNLUN ?= OFF
BANG ?= OFF

CMAKE_EXTRA =
# CMAKE_EXTRA += -DCMAKE_CXX_COMPILER=
Expand Down
12 changes: 9 additions & 3 deletions src/02hardware/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,18 @@ cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(hardware VERSION 0.0.0 LANGUAGES CXX)
message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION})

# Source files
file(GLOB HARDWARE_SRC src/*.cc src/*.cpp src/devices/cpu/*.cc)

if(USE_CUDA)
file(GLOB_RECURSE HARDWARE_CUDA_SRC src/*.cu)
file(GLOB_RECURSE HARDWARE_CUDA_SRC src/devices/nvidia/*.cu src/devices/nvidia/*.cc)
endif()

if(USE_BANG)
file(GLOB_RECURSE HARDWARE_BANG_SRC src/devices/mlu/*.cc)
endif()

file(GLOB_RECURSE HARDWARE_SRC src/*.cc src/*.cpp)
add_library(hardware STATIC ${HARDWARE_SRC} ${HARDWARE_CUDA_SRC})
add_library(hardware STATIC ${HARDWARE_SRC} ${HARDWARE_CUDA_SRC} ${HARDWARE_BANG_SRC})
target_link_libraries(hardware PUBLIC common)
target_include_directories(hardware PUBLIC include)

Expand Down
2 changes: 2 additions & 0 deletions src/02hardware/include/hardware/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ namespace refactor::hardware {
enum class Type : int32_t {
Cpu,
Nvidia,
Mlu,
Kunlun,
};

protected:
Expand Down
19 changes: 19 additions & 0 deletions src/02hardware/include/hardware/devices/mlu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef HARDWARE_DEVICES_MLU_H
#define HARDWARE_DEVICES_MLU_H

#include "../device.h"

namespace refactor::hardware {

class Mlu final : public Device {
public:
explicit Mlu(int32_t card);
void setContext() const noexcept final;
Type type() const noexcept final {
return Type::Mlu;
}
};

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_MLU_H
10 changes: 5 additions & 5 deletions src/02hardware/src/devices/cpu/memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@
namespace refactor::hardware {
using M = CpuMemory;

void *M::malloc(size_t size) noexcept {
void *M::malloc(size_t size) {
return std::malloc(size);
}
void M::free(void *ptr) noexcept {
void M::free(void *ptr) {
std::free(ptr);
}
void *M::copyHD(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyHD(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}
void *M::copyDH(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyDH(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}
void *M::copyDD(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyDD(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}

Expand Down
10 changes: 5 additions & 5 deletions src/02hardware/src/devices/cpu/memory.hh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
namespace refactor::hardware {

class CpuMemory final : public Memory {
void *malloc(size_t) noexcept final;
void free(void *) noexcept final;
void *copyHD(void *dst, void const *src, size_t bytes) const noexcept final;
void *copyDH(void *dst, void const *src, size_t bytes) const noexcept final;
void *copyDD(void *dst, void const *src, size_t bytes) const noexcept final;
void *malloc(size_t) final;
void free(void *) final;
void *copyHD(void *dst, void const *src, size_t bytes) const final;
void *copyDH(void *dst, void const *src, size_t bytes) const final;
void *copyDD(void *dst, void const *src, size_t bytes) const final;
};

}// namespace refactor::hardware
Expand Down
27 changes: 27 additions & 0 deletions src/02hardware/src/devices/mlu/device.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "functions.cc"
#include "hardware/devices/mlu.h"
#include "hardware/mem_pool.h"
#include "memory.hh"

namespace refactor::hardware {

static Arc<Memory> bangMemory(int32_t card) {
ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card);
setDevice(card);
auto [free, total] = getMemInfo();
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}",
card, free, total, size);
return std::make_shared<MemPool>(
std::make_shared<MluMemory>(),
size,
256ul);
}

Mlu::Mlu(int32_t card) : Device(card, bangMemory(card)) {}

void Mlu::setContext() const noexcept {
setDevice(_card);
}

}// namespace refactor::hardware
19 changes: 19 additions & 0 deletions src/02hardware/src/devices/mlu/functions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "functions.hh"

namespace refactor::hardware {

int getDeviceCount() {
int deviceCount;
BANG_ASSERT(cnrtGetDeviceCount(&deviceCount));
return deviceCount;
}
void setDevice(int device) {
BANG_ASSERT(cnrtSetDevice(device));
}
MemInfo getMemInfo() {
MemInfo memInfo;
BANG_ASSERT(cudaMemGetInfo(&memInfo.free, &memInfo.total));
return memInfo;
}

}// namespace refactor::hardware
24 changes: 24 additions & 0 deletions src/02hardware/src/devices/mlu/functions.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef HARDWARE_DEVICES_MLU_FUNCTIONS_CUH
#define HARDWARE_DEVICES_MLU_FUNCTIONS_CUH

#include "common.h"

#define BANG_ASSERT(STATUS) \
if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \
RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \
cnrtGetErrorStr(status), (int) status)); \
}

namespace refactor::hardware {

struct MemInfo {
size_t free, total;
};

int getDeviceCount();
void setDevice(int device);
MemInfo getMemInfo();

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_NVIDIA_FUNCTIONS_CUH
31 changes: 31 additions & 0 deletions src/02hardware/src/devices/mlu/memory.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "memory.hh"
#include "functions.hh"

namespace refactor::hardware {
using M = MluMemory;

void *M::malloc(size_t size) {
void *ptr;
BANG_ASSERT(cnrtMalloc(&ptr, size));
return ptr;
}
void M::free(void *ptr) {
BANG_ASSERT(cnrtFree(ptr));
}
void *M::copyHD(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_HOST2DEV))
return dst;
}
void *M::copyDH(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_DEV2HOST));
return dst;
}
void *M::copyDD(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_PEER2PEER));
return dst;
}

}// namespace refactor::hardware
18 changes: 18 additions & 0 deletions src/02hardware/src/devices/mlu/memory.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef HARDWARE_DEVICES_MLU_MEMORY_CUH
#define HARDWARE_DEVICES_MLU_MEMORY_CUH

#include "hardware/memory.h"

namespace refactor::hardware {

class MluMemory final : public Memory {
void *malloc(size_t) final;
void free(void *) final;
void *copyHD(void *dst, void const *src, size_t bytes) const final;
void *copyDH(void *dst, void const *src, size_t bytes) const final;
void *copyDD(void *dst, void const *src, size_t bytes) const final;
};

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_MLU_MEMORY_HH
12 changes: 2 additions & 10 deletions src/02hardware/src/devices/nvidia/device.cc
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
#include "hardware/devices/nvidia.h"
#include "functions.cuh"
#include "hardware/devices/nvidia.h"
#include "hardware/mem_pool.h"
#ifdef USE_CUDA
#include "functions.cuh"
#include "memory.cuh"
#endif

namespace refactor::hardware {

static Arc<Memory> cudaMemory(int32_t card) {
#ifdef USE_CUDA
ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card);
setDevice(card);
auto [free, total] = getMemInfo();
Expand All @@ -19,17 +16,12 @@ namespace refactor::hardware {
std::make_shared<NvidiaMemory>(),
size,
256ul);
#else
RUNTIME_ERROR("CUDA is not enabled");
#endif
}

Nvidia::Nvidia(int32_t card) : Device(card, cudaMemory(card)) {}

void Nvidia::setContext() const noexcept {
#ifdef USE_CUDA
setDevice(_card);
#endif
}

}// namespace refactor::hardware

0 comments on commit bd6ad6b

Please sign in to comment.