Skip to content

Commit

Permalink
[AIE2P][AA] added AIE2P intrinsics to AA
Browse files Browse the repository at this point in the history
  • Loading branch information
F-Stuckmann committed Jan 31, 2025
1 parent ce3dc2c commit a149a57
Show file tree
Hide file tree
Showing 3 changed files with 970 additions and 1 deletion.
88 changes: 87 additions & 1 deletion llvm/lib/Target/AIE/AIEBaseAliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
/// \file
Expand All @@ -22,6 +22,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsAIE2.h"
#include "llvm/IR/IntrinsicsAIE2P.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -75,6 +76,32 @@ static bool isAIEPtrAddIntrinsic(Intrinsic::ID ID, unsigned &InPtrIdx) {
switch (ID) {
case Intrinsic::aie2_add_2d:
case Intrinsic::aie2_add_3d:
case Intrinsic::aie2p_add_2d:
case Intrinsic::aie2p_add_3d:
case Intrinsic::aie2p_fifo_ld_fill:
case Intrinsic::aie2p_fifo_ld_pop_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_544_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_1d_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_544_1d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_1d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_2d_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_3d_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_544_2d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_2d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_544_3d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_3d_bfp16:
case Intrinsic::aie2p_fifo_st_flush:
case Intrinsic::aie2p_fifo_st_flush_1d:
case Intrinsic::aie2p_fifo_st_flush_1d_conv:
case Intrinsic::aie2p_fifo_st_flush_2d:
case Intrinsic::aie2p_fifo_st_flush_2d_conv:
case Intrinsic::aie2p_fifo_st_flush_3d:
case Intrinsic::aie2p_fifo_st_flush_3d_conv:
case Intrinsic::aie2p_fifo_st_flush_conv:
case Intrinsic::aie2p_fifo_st_push_512_bfp16:
case Intrinsic::aie2p_fifo_st_push_544_bfp16:
case Intrinsic::aie2p_fifo_st_push_576_bfp16:
InPtrIdx = 0;
return true;
default:
Expand All @@ -93,6 +120,36 @@ static SmallVector<unsigned, 5> getAddIntrinsicOps(Intrinsic::ID ID) {
case Intrinsic::aie2_add_3d:
return {1, 2, 3, 4, /*,count1*/ 6,
/*,count2*/};
case Intrinsic::aie2p_add_2d:
return {1, 2, 3 /*,count*/};
case Intrinsic::aie2p_add_3d:
return {1, 2, 3, 4, /*,count1*/ 6,
/*,count2*/};
case Intrinsic::aie2p_fifo_ld_fill:
case Intrinsic::aie2p_fifo_ld_pop_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_544_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_1d_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_544_1d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_1d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_2d_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_3d_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_544_2d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_2d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_544_3d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_3d_bfp16:
case Intrinsic::aie2p_fifo_st_flush:
case Intrinsic::aie2p_fifo_st_flush_1d:
case Intrinsic::aie2p_fifo_st_flush_1d_conv:
case Intrinsic::aie2p_fifo_st_flush_2d:
case Intrinsic::aie2p_fifo_st_flush_2d_conv:
case Intrinsic::aie2p_fifo_st_flush_3d:
case Intrinsic::aie2p_fifo_st_flush_3d_conv:
case Intrinsic::aie2p_fifo_st_flush_conv:
case Intrinsic::aie2p_fifo_st_push_512_bfp16:
case Intrinsic::aie2p_fifo_st_push_544_bfp16:
case Intrinsic::aie2p_fifo_st_push_576_bfp16:
return {};
default:
llvm_unreachable("Unknown intrinsic");
}
Expand All @@ -107,6 +164,35 @@ static SmallVector<unsigned, 5> getAddIntrinsicCounterOps(Intrinsic::ID ID) {
return {4};
case Intrinsic::aie2_add_3d:
return {5, 7};
case Intrinsic::aie2p_add_2d:
return {4};
case Intrinsic::aie2p_add_3d:
return {5, 7};
case Intrinsic::aie2p_fifo_ld_fill:
case Intrinsic::aie2p_fifo_ld_pop_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_544_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_1d_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_544_1d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_1d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_2d_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_3d_unaligned:
case Intrinsic::aie2p_fifo_ld_pop_544_2d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_2d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_544_3d_bfp16:
case Intrinsic::aie2p_fifo_ld_pop_576_3d_bfp16:
case Intrinsic::aie2p_fifo_st_flush:
case Intrinsic::aie2p_fifo_st_flush_1d:
case Intrinsic::aie2p_fifo_st_flush_1d_conv:
case Intrinsic::aie2p_fifo_st_flush_2d:
case Intrinsic::aie2p_fifo_st_flush_2d_conv:
case Intrinsic::aie2p_fifo_st_flush_3d:
case Intrinsic::aie2p_fifo_st_flush_3d_conv:
case Intrinsic::aie2p_fifo_st_flush_conv:
case Intrinsic::aie2p_fifo_st_push_512_bfp16:
case Intrinsic::aie2p_fifo_st_push_544_bfp16:
case Intrinsic::aie2p_fifo_st_push_576_bfp16:
return {};
default:
llvm_unreachable("Unknown intrinsic");
}
Expand Down
214 changes: 214 additions & 0 deletions llvm/test/CodeGen/AIE/alias-analysis-fifo-vst-vld-scheduling.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple=aie2p --start-after=machine-scheduler -o - %s | FileCheck %s

# no alias conflicts between load and stores

--- |
; ModuleID = 'llvm/test/CodeGen/AIE/alias-analysis-fifo-vst-vld-scheduling.ll'
source_filename = "0_0/src/0_0.cc"
target datalayout = "e-m:e-p:20:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-f32:32:32-i64:32-f64:32-a:0:32-n32"
target triple = "aie2p"

$concurrentLoadStore = comdat any

; Function Attrs: mustprogress noinline
define weak_odr dso_local void @concurrentLoadStore(ptr noalias %in, ptr noalias %out, ptr noalias nonnull align 64 dereferenceable(64) %params) local_unnamed_addr #0 comdat {
; CHECK-LABEL: concurrentLoadStore:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: mova r26, #0; nopb ; nops ; nopx ; mov p2, p1; nopv
; CHECK-NEXT: nopx ; vbcst.32 x0, r26
; CHECK-NEXT: vbcst.8 x2, r26
; CHECK-NEXT: mov r24, r26
; CHECK-NEXT: mov r27, r26
; CHECK-NEXT: mov el2, r26
; CHECK-NEXT: mov eh2, r27
; CHECK-NEXT: vlda.fill.512 [p0, lf0, r24]; vmov x1, x0
; CHECK-NEXT: vldb.pop.512 x4, [p0, lf0, r24]; vmov lfl0, x0
; CHECK-NEXT: vmov lfh0, x1
; CHECK-NEXT: ret lr; vmov sfl, x0
; CHECK-NEXT: vmov sfh, x1 // Delay Slot 5
; CHECK-NEXT: vst.push.576 ex2, [p2, sf, r26] // Delay Slot 4
; CHECK-NEXT: vst.flush.512 [p2, sf, r26] // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
entry:
br label %bb.exit

bb.exit: ; preds = %entry
%I.7 = call { ptr, <32 x i32>, i32 } @llvm.aie2p.fifo.ld.fill(ptr %in, <32 x i32> zeroinitializer, i32 0)
%I.8 = extractvalue { ptr, <32 x i32>, i32 } %I.7, 0
%I.9 = extractvalue { ptr, <32 x i32>, i32 } %I.7, 1
%I.10 = extractvalue { ptr, <32 x i32>, i32 } %I.7, 2
%I.11 = tail call { <64 x i8>, ptr, <32 x i32>, i32 } @llvm.aie2p.fifo.ld.pop.unaligned(ptr %I.8, <32 x i32> %I.9, i32 %I.10)
%I.12 = extractvalue { <64 x i8>, ptr, <32 x i32>, i32 } %I.11, 1
%I.13 = extractvalue { <64 x i8>, ptr, <32 x i32>, i32 } %I.11, 2
%I.14 = extractvalue { <64 x i8>, ptr, <32 x i32>, i32 } %I.11, 3
%I.15 = extractvalue { <64 x i8>, ptr, <32 x i32>, i32 } %I.11, 0
%vec = or i64 0, 0
%i8vec = bitcast i64 %vec to <8 x i8>
%0 = call { ptr, <32 x i32>, i32 } @llvm.aie2p.fifo.st.push.576.bfp16(ptr %out, <64 x i8> zeroinitializer, <8 x i8> %i8vec, <32 x i32> zeroinitializer, i32 0)
%1 = extractvalue { ptr, <32 x i32>, i32 } %0, 0
%2 = extractvalue { ptr, <32 x i32>, i32 } %0, 1
%3 = extractvalue { ptr, <32 x i32>, i32 } %0, 2
%4 = call { ptr, <32 x i32>, i32 } @llvm.aie2p.fifo.st.flush(ptr %1, <32 x i32> %2, i32 %3)
%5 = extractvalue { ptr, <32 x i32>, i32 } %4, 0
%6 = extractvalue { ptr, <32 x i32>, i32 } %4, 1
%7 = extractvalue { ptr, <32 x i32>, i32 } %4, 2
ret void
}

; Function Attrs: nounwind memory(argmem: read)
declare { ptr, <32 x i32>, i32 } @llvm.aie2p.fifo.ld.fill(ptr, <32 x i32>, i32) #1

; Function Attrs: nounwind memory(none)
declare <32 x float> @llvm.aie2p.v32bf16.to.v32accfloat(<32 x bfloat>) #2

; Function Attrs: nounwind memory(argmem: read)
declare { <64 x i8>, ptr, <32 x i32>, i32 } @llvm.aie2p.fifo.ld.pop.unaligned(ptr, <32 x i32>, i32) #1

; Function Attrs: nounwind memory(argmem: read)
declare { <64 x i8>, ptr, <32 x i32>, i32, i20 } @llvm.aie2p.fifo.ld.pop.2d.unaligned(ptr, <32 x i32>, i32, i20, i20, i20, i20) #1

; Function Attrs: nounwind memory(argmem: write)
declare { ptr, <32 x i32>, i32 } @llvm.aie2p.fifo.st.push.576.bfp16(ptr, <64 x i8>, <8 x i8>, <32 x i32>, i32) #3

; Function Attrs: nounwind memory(inaccessiblemem: read)
declare { <64 x i8>, <8 x i8> } @llvm.aie2p.v64accfloat.to.v64bfp16ebs8(<64 x i8>, <8 x i8>, <64 x float>) #4

; Function Attrs: nounwind memory(argmem: write)
declare { ptr, <32 x i32>, i32 } @llvm.aie2p.fifo.st.flush(ptr, <32 x i32>, i32) #3

; Function Attrs: nounwind memory(argmem: write)
declare { ptr, <32 x i32>, i32, i20 } @llvm.aie2p.fifo.st.flush.2d(ptr, <32 x i32>, i32, i20, i20, i20, i20) #3

attributes #0 = { mustprogress noinline "no-jump-tables"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #1 = { nounwind memory(argmem: read) }
attributes #2 = { nounwind memory(none) }
attributes #3 = { nounwind memory(argmem: write) }
attributes #4 = { nounwind memory(inaccessiblemem: read) }

!llvm.linker.options = !{}
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 19.0.0git (/scratch/llvm-aie/clang 5c68f0e173ec03a2b4f908018663ca52923e52fb)"}

...
---
name: concurrentLoadStore
alignment: 16
exposesReturnsTwice: false
legalized: true
regBankSelected: true
selected: true
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
callsEHReturn: false
callsUnwindInit: false
hasEHCatchret: false
hasEHScopes: false
hasEHFunclets: false
isOutlined: false
debugInstrRef: false
failsVerification: false
tracksDebugUserValues: false
registers:
- { id: 0, class: eps, preferred-register: '' }
- { id: 1, class: mpfs, preferred-register: '' }
- { id: 2, class: _, preferred-register: '' }
- { id: 3, class: eps, preferred-register: '' }
- { id: 4, class: eldfiforeg, preferred-register: '' }
- { id: 5, class: erf2, preferred-register: '' }
- { id: 6, class: vec1024, preferred-register: '' }
- { id: 7, class: mr26_fifo_st, preferred-register: '' }
- { id: 8, class: vec512, preferred-register: '' }
- { id: 9, class: eps, preferred-register: '' }
- { id: 10, class: eldfiforeg, preferred-register: '' }
- { id: 11, class: erf2, preferred-register: '' }
- { id: 12, class: _, preferred-register: '' }
- { id: 13, class: el_with_sub_l_even_in_er26, preferred-register: '' }
- { id: 14, class: el, preferred-register: '' }
- { id: 15, class: mpfs, preferred-register: '' }
- { id: 16, class: mstfifo, preferred-register: '' }
- { id: 17, class: mr26_fifo_st, preferred-register: '' }
- { id: 18, class: vec512, preferred-register: '' }
- { id: 19, class: _, preferred-register: '' }
- { id: 20, class: ep_as_32bit_and_mpfs, preferred-register: '' }
- { id: 21, class: mstfifo, preferred-register: '' }
- { id: 22, class: mr26_fifo_st, preferred-register: '' }
- { id: 23, class: exe, preferred-register: '' }
- { id: 24, class: _, preferred-register: '' }
- { id: 25, class: er, preferred-register: '' }
- { id: 26, class: eldfiforeg, preferred-register: '' }
- { id: 27, class: mstfifo, preferred-register: '' }
- { id: 28, class: vec576, preferred-register: '' }
- { id: 29, class: erf2, preferred-register: '' }
- { id: 30, class: exo, preferred-register: '' }
- { id: 31, class: epsrfldf, preferred-register: '' }
- { id: 32, class: epsrfldf, preferred-register: '' }
liveins: []
calleeSavedRegisters: [ '$lr', '$r8', '$r9', '$r10', '$r11', '$r12', '$r13',
'$r14', '$r15', '$p6', '$p7' ]
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
functionContext: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
entry_values: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo:
varArgsFrameIndex: 0
body: |
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $p0, $p1, $p2
undef %32.sub_ptr:epsrfldf = COPY $p0
%20:ep_as_32bit_and_mpfs = COPY $p1
undef %13.sub_l_even:el_with_sub_l_even_in_er26 = MOV_RLC_imm11_pseudo 0
undef %6.sub_512_lo:vec1024 = VBCST_32 %13.sub_l_even
%6.sub_512_hi:vec1024 = COPY %6.sub_512_lo
undef %28.sub_bfp16_x:vec576 = VBCST_8 %13.sub_l_even
bb.1.bb.exit:
%32.sub_fifo:epsrfldf = COPY %6
%32.sub_avail:epsrfldf = COPY %13.sub_l_even
%32.sub_ptr:epsrfldf, %32.sub_fifo:epsrfldf, %32.sub_avail:epsrfldf = VLDA_FILL_512 %32.sub_ptr, %32.sub_fifo, %32.sub_avail
dead %8:vec512, dead %32.sub_ptr:epsrfldf, dead %32.sub_fifo:epsrfldf, dead %32.sub_avail:epsrfldf = VLDB_POP_512_normal_pop %32.sub_ptr, %32.sub_fifo, %32.sub_avail, implicit-def $srfifo_uf
%13.sub_l_odd:el_with_sub_l_even_in_er26 = COPY %13.sub_l_even
%21:mstfifo = COPY %6
%28.sub_bfp16_e:vec576 = COPY %13
%21:mstfifo, %20:ep_as_32bit_and_mpfs, undef %13.sub_l_even:el_with_sub_l_even_in_er26 = VST_PUSH_576 %21, %28, %20, %13.sub_l_even, implicit-def $srfifo_of
dead %21:mstfifo, dead %20:ep_as_32bit_and_mpfs, dead %13.sub_l_even:el_with_sub_l_even_in_er26 = VST_FLUSH_512_normal_flush %21, %20, %13.sub_l_even, implicit-def $srfifo_of
PseudoRET implicit $lr
...
Loading

0 comments on commit a149a57

Please sign in to comment.