Skip to content

Commit

Permalink
[X86][AMX-AVX512][NFC] Remove P from intrinsic and instruction name (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
phoebewang authored Jan 17, 2025
1 parent baa5b76 commit 48803bc
Show file tree
Hide file tree
Showing 16 changed files with 177 additions and 177 deletions.
8 changes: 4 additions & 4 deletions clang/include/clang/Basic/BuiltinsX86_64.td
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,8 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {

let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2pbf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2pbf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2bf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2bf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2phh_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2phl_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
Expand Down Expand Up @@ -387,8 +387,8 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {

let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2pbf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2pbf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2bf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2bf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2phh : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2phl : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">;
def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, unsigned int)">;
Expand Down
40 changes: 20 additions & 20 deletions clang/lib/Headers/amxavx512intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row);
/// __m512i _tile_cvtrowps2bf16h(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
Expand All @@ -80,14 +80,14 @@
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction.
/// This intrinsic corresponds to the \c TCVTROWPS2BF16H instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2pbf16h(tsrc, row) \
__builtin_ia32_tcvtrowps2pbf16h(tsrc, row)
#define _tile_cvtrowps2bf16h(tsrc, row) \
__builtin_ia32_tcvtrowps2bf16h(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to bf16. It places the resulting bf16 elements
Expand All @@ -97,7 +97,7 @@
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row);
/// __m512i _tile_cvtrowps2bf16l(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
Expand All @@ -117,14 +117,14 @@
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction.
/// This intrinsic corresponds to the \c TCVTROWPS2BF16L instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2pbf16l(tsrc, row) \
__builtin_ia32_tcvtrowps2pbf16l(tsrc, row)
#define _tile_cvtrowps2bf16l(tsrc, row) \
__builtin_ia32_tcvtrowps2bf16l(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to fp16. It places the resulting fp16 elements
Expand Down Expand Up @@ -238,15 +238,15 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
_tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u);
_tile_cvtrowps2bf16h_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2bf16h_internal(m, n, src, u);
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
_tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u);
_tile_cvtrowps2bf16l_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2bf16l_internal(m, n, src, u);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
Expand Down Expand Up @@ -290,7 +290,7 @@ static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16H </c> instruction.
/// This intrinsic corresponds to the <c> TCVTROWPS2BF16H </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
Expand All @@ -299,8 +299,8 @@ static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1);
static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2bf16h_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
Expand All @@ -309,7 +309,7 @@ static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16L </c> instruction.
/// This intrinsic corresponds to the <c> TCVTROWPS2BF16L </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
Expand All @@ -318,8 +318,8 @@ static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1);
static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2bf16l_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Sema/SemaX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,8 +641,8 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_t2rpntlvwz1rs:
case X86::BI__builtin_ia32_t2rpntlvwz1rst1:
case X86::BI__builtin_ia32_t2rpntlvwz0rs:
case X86::BI__builtin_ia32_tcvtrowps2pbf16h:
case X86::BI__builtin_ia32_tcvtrowps2pbf16l:
case X86::BI__builtin_ia32_tcvtrowps2bf16h:
case X86::BI__builtin_ia32_tcvtrowps2bf16l:
case X86::BI__builtin_ia32_tcvtrowps2phh:
case X86::BI__builtin_ia32_tcvtrowps2phl:
case X86::BI__builtin_ia32_tcvtrowd2ps:
Expand Down
16 changes: 8 additions & 8 deletions clang/test/CodeGen/X86/amx_avx512_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ __m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) {
return __tile_cvtrowd2ps(a, b);
}

__m512bh test_tile_cvtrowps2pbf16h(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2pbf16h
__m512bh test_tile_cvtrowps2bf16h(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2bf16h
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal
return __tile_cvtrowps2pbf16h(a, b);
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h.internal
return __tile_cvtrowps2bf16h(a, b);
}

__m512bh test_tile_cvtrowps2pbf16l(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2pbf16l
__m512bh test_tile_cvtrowps2bf16l(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2bf16l
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal
return __tile_cvtrowps2pbf16l(a, b);
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l.internal
return __tile_cvtrowps2bf16l(a, b);
}

__m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) {
Expand Down
16 changes: 8 additions & 8 deletions clang/test/CodeGen/X86/amxavx512-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ __m512 test_tile_cvtrowd2ps(unsigned int A) {
return _tile_cvtrowd2ps(1, A);
}

__m512bh test_tile_cvtrowps2pbf16h(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2pbf16h(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %{{.*}})
return _tile_cvtrowps2pbf16h(1, A);
__m512bh test_tile_cvtrowps2bf16h(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2bf16h(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 1, i32 %{{.*}})
return _tile_cvtrowps2bf16h(1, A);
}

__m512bh test_tile_cvtrowps2pbf16l(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2pbf16l(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %{{.*}})
return _tile_cvtrowps2pbf16l(1, A);
__m512bh test_tile_cvtrowps2bf16l(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2bf16l(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 1, i32 %{{.*}})
return _tile_cvtrowps2bf16l(1, A);
}

__m512h test_tile_cvtrowps2phh(unsigned int A) {
Expand Down
14 changes: 7 additions & 7 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -5999,10 +5999,10 @@ let TargetPrefix = "x86" in {
def int_x86_tcvtrowd2ps : ClangBuiltin<"__builtin_ia32_tcvtrowd2ps">,
Intrinsic<[llvm_v16f32_ty], [llvm_i8_ty, llvm_i32_ty],
[ImmArg<ArgIndex<0>>]>;
def int_x86_tcvtrowps2pbf16h : ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16h">,
def int_x86_tcvtrowps2bf16h : ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16h">,
Intrinsic<[llvm_v32bf16_ty], [llvm_i8_ty, llvm_i32_ty],
[ImmArg<ArgIndex<0>>]>;
def int_x86_tcvtrowps2pbf16l : ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16l">,
def int_x86_tcvtrowps2bf16l : ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16l">,
Intrinsic<[llvm_v32bf16_ty], [llvm_i8_ty, llvm_i32_ty],
[ImmArg<ArgIndex<0>>]>;
def int_x86_tcvtrowps2phh : ClangBuiltin<"__builtin_ia32_tcvtrowps2phh">,
Expand Down Expand Up @@ -6181,13 +6181,13 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v16f32_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
[]>;
def int_x86_tcvtrowps2pbf16h_internal :
ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16h_internal">,
def int_x86_tcvtrowps2bf16h_internal :
ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16h_internal">,
Intrinsic<[llvm_v32bf16_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
[]>;
def int_x86_tcvtrowps2pbf16l_internal :
ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16l_internal">,
def int_x86_tcvtrowps2bf16l_internal :
ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16l_internal">,
Intrinsic<[llvm_v32bf16_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
[]>;
Expand Down Expand Up @@ -7893,4 +7893,4 @@ def int_x86_movrsdi : ClangBuiltin<"__builtin_ia32_movrsdi">,
[IntrReadMem]>;
def int_x86_prefetchrs : ClangBuiltin<"__builtin_ia32_prefetchrs">,
Intrinsic<[], [llvm_ptr_ty], []>;
}
}
24 changes: 12 additions & 12 deletions llvm/lib/Target/X86/X86ExpandPseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,10 +563,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::PTILELOADDRST1V:
case X86::PTCVTROWD2PSrreV:
case X86::PTCVTROWD2PSrriV:
case X86::PTCVTROWPS2PBF16HrreV:
case X86::PTCVTROWPS2PBF16HrriV:
case X86::PTCVTROWPS2PBF16LrreV:
case X86::PTCVTROWPS2PBF16LrriV:
case X86::PTCVTROWPS2BF16HrreV:
case X86::PTCVTROWPS2BF16HrriV:
case X86::PTCVTROWPS2BF16LrreV:
case X86::PTCVTROWPS2BF16LrriV:
case X86::PTCVTROWPS2PHHrreV:
case X86::PTCVTROWPS2PHHrriV:
case X86::PTCVTROWPS2PHLrreV:
Expand Down Expand Up @@ -595,17 +595,17 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::PTCVTROWD2PSrriV:
Opc = X86::TCVTROWD2PSrri;
break;
case X86::PTCVTROWPS2PBF16HrreV:
Opc = X86::TCVTROWPS2PBF16Hrre;
case X86::PTCVTROWPS2BF16HrreV:
Opc = X86::TCVTROWPS2BF16Hrre;
break;
case X86::PTCVTROWPS2PBF16HrriV:
Opc = X86::TCVTROWPS2PBF16Hrri;
case X86::PTCVTROWPS2BF16HrriV:
Opc = X86::TCVTROWPS2BF16Hrri;
break;
case X86::PTCVTROWPS2PBF16LrreV:
Opc = X86::TCVTROWPS2PBF16Lrre;
case X86::PTCVTROWPS2BF16LrreV:
Opc = X86::TCVTROWPS2BF16Lrre;
break;
case X86::PTCVTROWPS2PBF16LrriV:
Opc = X86::TCVTROWPS2PBF16Lrri;
case X86::PTCVTROWPS2BF16LrriV:
Opc = X86::TCVTROWPS2BF16Lrri;
break;
case X86::PTCVTROWPS2PHHrreV:
Opc = X86::TCVTROWPS2PHHrre;
Expand Down
24 changes: 12 additions & 12 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37892,8 +37892,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
case X86::PTCVTROWPS2PBF16Hrri:
case X86::PTCVTROWPS2PBF16Lrri:
case X86::PTCVTROWPS2BF16Hrri:
case X86::PTCVTROWPS2BF16Lrri:
case X86::PTCVTROWPS2PHHrri:
case X86::PTCVTROWPS2PHLrri:
case X86::PTCVTROWD2PSrri:
Expand All @@ -37906,14 +37906,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTCVTROWD2PSrri:
Opc = X86::TCVTROWD2PSrri;
break;
case X86::PTCVTROWPS2PBF16Hrri:
Opc = X86::TCVTROWPS2PBF16Hrri;
case X86::PTCVTROWPS2BF16Hrri:
Opc = X86::TCVTROWPS2BF16Hrri;
break;
case X86::PTCVTROWPS2PHHrri:
Opc = X86::TCVTROWPS2PHHrri;
break;
case X86::PTCVTROWPS2PBF16Lrri:
Opc = X86::TCVTROWPS2PBF16Lrri;
case X86::PTCVTROWPS2BF16Lrri:
Opc = X86::TCVTROWPS2BF16Lrri;
break;
case X86::PTCVTROWPS2PHLrri:
Opc = X86::TCVTROWPS2PHLrri;
Expand All @@ -37930,8 +37930,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
case X86::PTCVTROWPS2PBF16Hrre:
case X86::PTCVTROWPS2PBF16Lrre:
case X86::PTCVTROWPS2BF16Hrre:
case X86::PTCVTROWPS2BF16Lrre:
case X86::PTCVTROWPS2PHHrre:
case X86::PTCVTROWPS2PHLrre:
case X86::PTCVTROWD2PSrre:
Expand All @@ -37944,11 +37944,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTCVTROWD2PSrre:
Opc = X86::TCVTROWD2PSrre;
break;
case X86::PTCVTROWPS2PBF16Hrre:
Opc = X86::TCVTROWPS2PBF16Hrre;
case X86::PTCVTROWPS2BF16Hrre:
Opc = X86::TCVTROWPS2BF16Hrre;
break;
case X86::PTCVTROWPS2PBF16Lrre:
Opc = X86::TCVTROWPS2PBF16Lrre;
case X86::PTCVTROWPS2BF16Lrre:
Opc = X86::TCVTROWPS2BF16Lrre;
break;
case X86::PTCVTROWPS2PHHrre:
Opc = X86::TCVTROWPS2PHHrre;
Expand Down
44 changes: 22 additions & 22 deletions llvm/lib/Target/X86/X86InstrAMX.td
Original file line number Diff line number Diff line change
Expand Up @@ -585,26 +585,26 @@ let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
[(set VR512: $dst,
(int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2PBF16HrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2pbf16h_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2PBF16HrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2pbf16h_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2PBF16LrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2pbf16l_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2PBF16LrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2pbf16l_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2BF16HrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2BF16HrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2BF16LrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2BF16LrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
Expand Down Expand Up @@ -654,8 +654,8 @@ multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr,

defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>;
defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>;
defm TCVTROWPS2PBF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2pbf16h", XD, XD>;
defm TCVTROWPS2PBF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2pbf16l", XS, XS>;
defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>;
defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>;

multiclass m_tilemovrow {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
Expand Down
Loading

0 comments on commit 48803bc

Please sign in to comment.