Skip to content

Commit

Permalink
musa: remove Clang builtins mapping (#9421)
Browse files Browse the repository at this point in the history
Signed-off-by: Xiaodong Ye <[email protected]>
  • Loading branch information
yeahdongcn authored Sep 11, 2024
1 parent 51b6038 commit b34e023
Showing 1 changed file with 0 additions and 39 deletions.
39 changes: 0 additions & 39 deletions ggml/src/ggml-cuda/vendors/musa.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,42 +130,3 @@
#define cudaKernelNodeParams musaKernelNodeParams
#define cudaStreamCaptureModeRelaxed musaStreamCaptureModeRelaxed
#define cudaStreamEndCapture musaStreamEndCapture

// XXX: Clang builtins mapping
#define __vsub4 __vsub4_musa
#define __vcmpeq4 __vcmpeq4_musa
#define __vcmpne4 __vcmpne4_musa

#ifndef __has_builtin
#define __has_builtin(x) 0
#endif

typedef uint8_t uint8x4_t __attribute__((ext_vector_type(4)));

static __device__ __forceinline__ int __vsub4_musa(const int a, const int b) {
return __vsubss4(a, b);
}

static __device__ __forceinline__ unsigned int __vcmpeq4_musa(unsigned int a, unsigned int b) {
const uint8x4_t& va = reinterpret_cast<const uint8x4_t&>(a);
const uint8x4_t& vb = reinterpret_cast<const uint8x4_t&>(b);
unsigned int c;
uint8x4_t& vc = reinterpret_cast<uint8x4_t&>(c);
#pragma unroll
for (int i = 0; i < 4; ++i) {
vc[i] = va[i] == vb[i] ? 0xff : 0x00;
}
return c;
}

static __device__ __forceinline__ unsigned int __vcmpne4_musa(unsigned int a, unsigned int b) {
const uint8x4_t& va = reinterpret_cast<const uint8x4_t&>(a);
const uint8x4_t& vb = reinterpret_cast<const uint8x4_t&>(b);
unsigned int c;
uint8x4_t& vc = reinterpret_cast<uint8x4_t&>(c);
#pragma unroll
for (int i = 0; i < 4; ++i) {
vc[i] = va[i] == vb[i] ? 0x00 : 0xff;
}
return c;
}

0 comments on commit b34e023

Please sign in to comment.