Skip to content

Commit

Permalink
[AIE2P] Add fifo store intrinsics - IR lowering
Browse files Browse the repository at this point in the history
  • Loading branch information
khallouh committed Jan 24, 2025
1 parent af6f54b commit e30d6ef
Show file tree
Hide file tree
Showing 5 changed files with 4,527 additions and 8 deletions.
14 changes: 13 additions & 1 deletion clang/include/clang/Basic/BuiltinsAIE2P.def
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
Expand Down Expand Up @@ -323,3 +323,15 @@ BUILTIN(__builtin_aie2p_divstep, "vUi&Ui&Ui", "nc")

// SHUFFLE
BUILTIN(__builtin_aie2p_vshuffle_576_bfp16, "vV64cV8cV64cV8ciV64c&V8c&", "nc")
// Unaligned and bfp16 stores
BUILTIN(__builtin_aie2p_fifo_st_push_576_bfp16, "vi*&V64cV8cV32i&i&", "nc")
BUILTIN(__builtin_aie2p_fifo_st_push_512_bfp16, "vi*&V16iV32i&i&", "nc")
BUILTIN(__builtin_aie2p_fifo_st_push_544_bfp16, "vi*&V64cV8cV32i&i&", "nc")
BUILTIN(__builtin_aie2p_fifo_st_flush, "vi*&V32i&i&", "nc")
BUILTIN(__builtin_aie2p_fifo_st_flush_1d_byte, "vi*&V32i&i&i", "nc")
BUILTIN(__builtin_aie2p_fifo_st_flush_2d_byte, "vi*&V32i&i&iii&i", "nc")
BUILTIN(__builtin_aie2p_fifo_st_flush_3d_byte, "vi*&V32i&i&iii&iii&i", "nc")
BUILTIN(__builtin_aie2p_fifo_st_flush_conv, "vi*&V32i&i&", "nc")
BUILTIN(__builtin_aie2p_fifo_st_flush_conv_1d_byte, "vi*&V32i&i&i", "nc")
BUILTIN(__builtin_aie2p_fifo_st_flush_conv_2d_byte, "vi*&V32i&i&iii&i", "nc")
BUILTIN(__builtin_aie2p_fifo_st_flush_conv_3d_byte, "vi*&V32i&i&iii&iii&i", "nc")
153 changes: 151 additions & 2 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Modifications (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its
// Modifications (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its
// affiliates
//
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -64,6 +64,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/MatrixBuilder.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
Expand Down Expand Up @@ -22056,6 +22057,28 @@ static llvm::Intrinsic::ID getAIE2PIntrinsicFunction(unsigned BuiltinID) {
return Intrinsic::aie2p_v64bfp16ebs8_to_v64bfp16ebs16;
case AIE::BI__builtin_aie2p_vshuffle_576_bfp16:
return Intrinsic::aie2p_vshuffle_576_bfp16;
case AIE::BI__builtin_aie2p_fifo_st_push_512_bfp16:
return Intrinsic::aie2p_fifo_st_push_512_bfp16;
case AIE::BI__builtin_aie2p_fifo_st_push_544_bfp16:
return Intrinsic::aie2p_fifo_st_push_544_bfp16;
case AIE::BI__builtin_aie2p_fifo_st_push_576_bfp16:
return Intrinsic::aie2p_fifo_st_push_576_bfp16;
case AIE::BI__builtin_aie2p_fifo_st_flush:
return Intrinsic::aie2p_fifo_st_flush;
case AIE::BI__builtin_aie2p_fifo_st_flush_conv:
return Intrinsic::aie2p_fifo_st_flush_conv;
case AIE::BI__builtin_aie2p_fifo_st_flush_1d_byte:
return Intrinsic::aie2p_fifo_st_flush_1d;
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_1d_byte:
return Intrinsic::aie2p_fifo_st_flush_1d_conv;
case AIE::BI__builtin_aie2p_fifo_st_flush_2d_byte:
return Intrinsic::aie2p_fifo_st_flush_2d_conv;
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_2d_byte:
return Intrinsic::aie2p_fifo_st_flush_2d;
case AIE::BI__builtin_aie2p_fifo_st_flush_3d_byte:
return Intrinsic::aie2p_fifo_st_flush_3d;
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_3d_byte:
return Intrinsic::aie2p_fifo_st_flush_3d_conv;
default:
break;
}
Expand Down Expand Up @@ -22293,6 +22316,7 @@ Value *CodeGenFunction::EmitAIEBuiltinExpr(unsigned BuiltinID,
Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 2)));
Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
}

llvm::Intrinsic::ID IntrinsicID = getAIEIntrinsicFunction(BuiltinID, Arch);
assert(IntrinsicID != Intrinsic::not_intrinsic);
Function *F = CGM.getIntrinsic(IntrinsicID);
Expand Down Expand Up @@ -22331,6 +22355,120 @@ Value *CodeGenFunction::EmitAIEBuiltinExpr(unsigned BuiltinID,
EmitLValue(E->getArg(E->getNumArgs() - 1)).getPointer(*this);
return Builder.CreateDefaultAlignedStore(Exp, ExpAddr);
}
case AIE::BI__builtin_aie2p_fifo_st_push_512_bfp16:
case AIE::BI__builtin_aie2p_fifo_st_push_544_bfp16:
case AIE::BI__builtin_aie2p_fifo_st_push_576_bfp16:
case AIE::BI__builtin_aie2p_fifo_st_flush_conv:
case AIE::BI__builtin_aie2p_fifo_st_flush: {
SmallVector<Value *, 3> Ops;
for (unsigned I = 0; I < E->getNumArgs(); I++)
Ops.push_back(EmitScalarExpr(E->getArg(I)));

llvm::Intrinsic::ID IntrinsicID = getAIEIntrinsicFunction(BuiltinID, Arch);
assert(IntrinsicID != Intrinsic::not_intrinsic);
Function *F = CGM.getIntrinsic(IntrinsicID);
Value *Val = Builder.CreateCall(F, Ops);

Value *Ptr = Builder.CreateExtractValue(Val, 0);
Value *Fifo = Builder.CreateExtractValue(Val, 1);
Value *Avail = Builder.CreateExtractValue(Val, 2);
Value *PtrAddr = EmitLValue(E->getArg(0)).getPointer(*this);

Value *FifoAddr = nullptr;
Value *AvailAddr = nullptr;
if (BuiltinID == AIE::BI__builtin_aie2p_fifo_st_flush ||
BuiltinID == AIE::BI__builtin_aie2p_fifo_st_flush_conv) {
FifoAddr = EmitLValue(E->getArg(1)).getPointer(*this);
AvailAddr = EmitLValue(E->getArg(2)).getPointer(*this);
} else if (BuiltinID == AIE::BI__builtin_aie2p_fifo_st_push_512_bfp16) {
FifoAddr = EmitLValue(E->getArg(2)).getPointer(*this);
AvailAddr = EmitLValue(E->getArg(3)).getPointer(*this);
} else {
assert(BuiltinID == AIE::BI__builtin_aie2p_fifo_st_push_576_bfp16 ||
BuiltinID == AIE::BI__builtin_aie2p_fifo_st_push_544_bfp16 &&
"Unexpected BuiltinID");
FifoAddr = EmitLValue(E->getArg(3)).getPointer(*this);
AvailAddr = EmitLValue(E->getArg(4)).getPointer(*this);
}

Builder.CreateDefaultAlignedStore(Fifo, FifoAddr);
Builder.CreateDefaultAlignedStore(Avail, AvailAddr);
return Builder.CreateDefaultAlignedStore(Ptr, PtrAddr);
}
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_1d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_1d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_2d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_2d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_3d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_3d_byte: {
SmallVector<Value *, 3> Ops;
unsigned NumAddrIncs = 0;
switch (BuiltinID) {
case AIE::BI__builtin_aie2p_fifo_st_flush_1d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_1d_byte:
NumAddrIncs = 1;
break;
case AIE::BI__builtin_aie2p_fifo_st_flush_2d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_2d_byte:
NumAddrIncs = 4;
break;
case AIE::BI__builtin_aie2p_fifo_st_flush_3d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_3d_byte:
NumAddrIncs = 7;
break;
default:
llvm_unreachable("Unexpected BuiltinID");
}

for (unsigned I = 0; I < E->getNumArgs() - NumAddrIncs; I++)
Ops.push_back(EmitScalarExpr(E->getArg(I)));

for (unsigned i = E->getNumArgs() - NumAddrIncs, e = E->getNumArgs();
i != e; i++) {
Ops.push_back(
Builder.CreateTrunc(EmitScalarExpr(E->getArg(i)),
llvm::Type::getInt20Ty(getLLVMContext())));
}
llvm::Intrinsic::ID IntrinsicID = getAIEIntrinsicFunction(BuiltinID, Arch);
assert(IntrinsicID != Intrinsic::not_intrinsic);
Function *F = CGM.getIntrinsic(IntrinsicID);
Value *Val = Builder.CreateCall(F, Ops);

Value *Ptr = Builder.CreateExtractValue(Val, 0);
Value *Fifo = Builder.CreateExtractValue(Val, 1);
Value *Avail = Builder.CreateExtractValue(Val, 2);
Value *PtrAddr = EmitLValue(E->getArg(0)).getPointer(*this);
Value *FifoAddr = EmitLValue(E->getArg(1)).getPointer(*this);
Value *AvailAddr = EmitLValue(E->getArg(2)).getPointer(*this);

if (BuiltinID == AIE::BI__builtin_aie2p_fifo_st_flush_2d_byte ||
BuiltinID == AIE::BI__builtin_aie2p_fifo_st_flush_conv_2d_byte) {
Value *Count1 =
Builder.CreateZExt(Builder.CreateExtractValue(Val, 3),
llvm::Type::getInt32Ty(getLLVMContext()));
Value *Count1Addr = EmitLValue(E->getArg(5)).getPointer(*this);
Builder.CreateDefaultAlignedStore(Count1, Count1Addr);
} else if (BuiltinID == AIE::BI__builtin_aie2p_fifo_st_flush_3d_byte ||
BuiltinID == AIE::BI__builtin_aie2p_fifo_st_flush_conv_3d_byte) {
Value *Count1 =
Builder.CreateZExt(Builder.CreateExtractValue(Val, 3),
llvm::Type::getInt32Ty(getLLVMContext()));
Value *Count1Addr =
EmitLValue(E->getArg(E->getNumArgs() - 5)).getPointer(*this);
Value *Count2 =
Builder.CreateZExt(Builder.CreateExtractValue(Val, 4),
llvm::Type::getInt32Ty(getLLVMContext()));
EmitLValue(E->getArg(E->getNumArgs() - 2)).getPointer(*this);
Value *Count2Addr =
EmitLValue(E->getArg(E->getNumArgs() - 2)).getPointer(*this);
Builder.CreateDefaultAlignedStore(Count1, Count1Addr);
Builder.CreateDefaultAlignedStore(Count2, Count2Addr);
}

Builder.CreateDefaultAlignedStore(Fifo, FifoAddr);
Builder.CreateDefaultAlignedStore(Avail, AvailAddr);
return Builder.CreateDefaultAlignedStore(Ptr, PtrAddr);
}
default:
break;
}
Expand Down Expand Up @@ -22513,7 +22651,18 @@ Value *CodeGenFunction::EmitAIE2PBuiltinExpr(unsigned BuiltinID,
case AIE::BI__builtin_aie2p_v64accfloat_to_v64bfp16ebs8:
case AIE::BI__builtin_aie2p_v64accfloat_to_v64bfp16ebs16:
case AIE::BI__builtin_aie2p_v64bfp16ebs8_to_v64bfp16ebs16:
case AIE::BI__builtin_aie2p_vshuffle_576_bfp16: {
case AIE::BI__builtin_aie2p_vshuffle_576_bfp16:
case AIE::BI__builtin_aie2p_fifo_st_push_576_bfp16:
case AIE::BI__builtin_aie2p_fifo_st_push_512_bfp16:
case AIE::BI__builtin_aie2p_fifo_st_push_544_bfp16:
case AIE::BI__builtin_aie2p_fifo_st_flush:
case AIE::BI__builtin_aie2p_fifo_st_flush_conv:
case AIE::BI__builtin_aie2p_fifo_st_flush_1d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_1d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_2d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_2d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_3d_byte:
case AIE::BI__builtin_aie2p_fifo_st_flush_conv_3d_byte: {
return this->EmitAIEBuiltinExpr(BuiltinID, E, Arch);
}
default:
Expand Down
116 changes: 115 additions & 1 deletion clang/lib/Headers/aie2p_ldst.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -81,4 +81,118 @@ INTRINSIC(v64uint16) unpack(v64uint8 v) { return unpack(v, __SIGN_UNSIGNED); }
INTRINSIC(v128int8) unpack(v128int4 v) { return unpack(v, __SIGN_SIGNED); }
INTRINSIC(v128uint8) unpack(v128uint4 v) { return unpack(v, __SIGN_UNSIGNED); }

#define FIFO_ST_PUSH_NORMAL(T) \
INTRINSIC(void) fifo_st_reset(T *&p, T v, fifo_state_t &s) { \
int pos = 0; \
sparse_fifo_t &fifo = s.fifo; \
__builtin_aie2p_fifo_st_push_512_bfp16((int *&)p, v, fifo, pos); \
} \
INTRINSIC(void) fifo_st_push(T *&p, T v, fifo_state_t &s) { \
int &pos = s.pos; \
sparse_fifo_t &fifo = s.fifo; \
__builtin_aie2p_fifo_st_push_512_bfp16((int *&)p, v, fifo, pos); \
}

#define FIFO_ST_PUSH_BFP16(T, SIZE) \
INTRINSIC(void) fifo_st_reset(T##_unaligned *&p, T v, fifo_state_t &s) { \
int pos = 0; \
sparse_fifo_t &fifo = s.fifo; \
v64char mant = v.mantissa; \
v8char exp = v.exponent; \
__builtin_aie2p_fifo_st_push_##SIZE##_bfp16((int *&)p, mant, exp, fifo, \
pos); \
} \
INTRINSIC(void) fifo_st_push(T##_unaligned *&p, T v, fifo_state_t &s) { \
int &pos = s.pos; \
sparse_fifo_t &fifo = s.fifo; \
v64char mant = v.mantissa; \
v8char exp = v.exponent; \
__builtin_aie2p_fifo_st_push_##SIZE##_bfp16((int *&)p, mant, exp, fifo, \
pos); \
}

#define FIFO_ST_FLUSH_BARE(T, VAR) \
INTRINSIC(void) fifo_st_flush##VAR(T *&p, fifo_state_t &s) { \
int &pos = s.pos; \
sparse_fifo_t &fifo = s.fifo; \
__builtin_aie2p_fifo_st_flush((int *&)p, fifo, pos); \
} \
INTRINSIC(void) \
fifo_st_flush##VAR##_1d_byte(T *&p, fifo_state_t &s, int off) { \
int &pos = s.pos; \
sparse_fifo_t &fifo = s.fifo; \
__builtin_aie2p_fifo_st_flush_1d_byte((int *&)p, fifo, pos, off); \
} \
INTRINSIC(void) \
fifo_st_flush##VAR##_2d_byte(T *&p, fifo_state_t &s, int off, int size1, \
addr_t &count1, int inc1) { \
int &pos = s.pos; \
sparse_fifo_t &fifo = s.fifo; \
__builtin_aie2p_fifo_st_flush_2d_byte((int *&)p, fifo, pos, off, size1, \
count1, inc1); \
} \
INTRINSIC(void) \
fifo_st_flush##VAR##_3d_byte(T *&p, fifo_state_t &s, int off, int size1, \
addr_t &count1, int inc1, int size2, \
addr_t &count2, int inc2) { \
int &pos = s.pos; \
sparse_fifo_t &fifo = s.fifo; \
__builtin_aie2p_fifo_st_flush_3d_byte((int *&)p, fifo, pos, off, size1, \
count1, inc1, size2, count2, inc2); \
}

#define FIFO_ST_FLUSH_CONV(T) \
INTRINSIC(void) fifo_st_flush_conv(T *&p, fifo_state_t &s) { \
int &pos = s.pos; \
sparse_fifo_t &fifo = s.fifo; \
__builtin_aie2p_fifo_st_flush_conv((int *&)p, fifo, pos); \
} \
INTRINSIC(void) \
fifo_st_flush_conv_1d_byte(T *&p, fifo_state_t &s, int off) { \
int &pos = s.pos; \
sparse_fifo_t &fifo = s.fifo; \
__builtin_aie2p_fifo_st_flush_conv_1d_byte((int *&)p, fifo, pos, off); \
} \
INTRINSIC(void) \
fifo_st_flush_conv_2d_byte(T *&p, fifo_state_t &s, int off, int size1, \
addr_t &count1, int inc1) { \
int &pos = s.pos; \
sparse_fifo_t &fifo = s.fifo; \
__builtin_aie2p_fifo_st_flush_conv_2d_byte((int *&)p, fifo, pos, off, \
size1, count1, inc1); \
} \
INTRINSIC(void) \
fifo_st_flush_conv_3d_byte(T *&p, fifo_state_t &s, int off, int size1, \
addr_t &count1, int inc1, int size2, \
addr_t &count2, int inc2) { \
int &pos = s.pos; \
sparse_fifo_t &fifo = s.fifo; \
__builtin_aie2p_fifo_st_flush_conv_3d_byte( \
(int *&)p, fifo, pos, off, size1, count1, inc1, size2, count2, inc2); \
}

#define FIFO_ST_NORMAL(T) \
FIFO_ST_PUSH_NORMAL(T) \
FIFO_ST_FLUSH_BARE(T, ) \
FIFO_ST_FLUSH_BARE(T, _bare) \
FIFO_ST_FLUSH_CONV(T)
#define FIFO_ST_BFP16(T, SIZE) \
FIFO_ST_PUSH_BFP16(T, SIZE) \
FIFO_ST_FLUSH_BARE(T##_unaligned, ) \
FIFO_ST_FLUSH_BARE(T##_unaligned, _bare) \
FIFO_ST_FLUSH_CONV(T##_unaligned)

FIFO_ST_BFP16(v64bfp16ebs8, 576)
FIFO_ST_BFP16(v64bfp16ebs16, 544)
FIFO_ST_NORMAL(v32bfloat16)
FIFO_ST_NORMAL(v16float)
FIFO_ST_NORMAL(v128int4)
FIFO_ST_NORMAL(v128uint4)
FIFO_ST_NORMAL(v64int8)
FIFO_ST_NORMAL(v64uint8)
FIFO_ST_NORMAL(v32int16)
FIFO_ST_NORMAL(v32uint16)
FIFO_ST_NORMAL(v16int32)
FIFO_ST_NORMAL(v16uint32)

#endif // AIE2P_LDST_H
Loading

0 comments on commit e30d6ef

Please sign in to comment.