[NFC][AIE] Split AIELegalizerInfo into AIE1LegalizerInfo and AIE2Lega…

…lizerInfo
Xilinx · Aug 21, 2024 · 23842e9 · 23842e9
1 parent 5638f64
commit 23842e9
Show file tree

Hide file tree

Showing 7 changed files with 843 additions and 593 deletions.
diff --git a/llvm/lib/Target/AIE/AIE1LegalizerInfo.cpp b/llvm/lib/Target/AIE/AIE1LegalizerInfo.cpp
@@ -0,0 +1,299 @@
+//===- AIE1LegalizerInfo.cpp ----------------------------------------------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for AIE.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "AIE1LegalizerInfo.h"
+#include "AIEBaseISelLowering.h"
+#include "AIEBaseSubtarget.h"
+#include "AIELegalizerHelper.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGenTypes/LowLevelType.h"
+#include <initializer_list>
+
+using namespace llvm;
+
+AIE1LegalizerInfo::AIE1LegalizerInfo(const AIEBaseSubtarget &ST)
+    : AIEHelper(ST) {
+  using namespace TargetOpcode;
+  const LLT S8 = LLT::scalar(8);
+  const LLT S16 = LLT::scalar(16);
+  const LLT S20 = LLT::scalar(20);
+  const LLT S32 = LLT::scalar(32);
+  const LLT S64 = LLT::scalar(64);
+  const LLT P0 = LLT::pointer(0, 20);
+
+  // 128-bit vectors
+  const LLT V16S8 = LLT::fixed_vector(16, 8);
+  const LLT V8S16 = LLT::fixed_vector(8, 16);
+  const LLT V4S32 = LLT::fixed_vector(4, 32);
+
+  // 256-bit vectors
+  const LLT V8S32 = LLT::fixed_vector(8, 32);
+  const LLT V16S16 = LLT::fixed_vector(16, 16);
+  const LLT V32S8 = LLT::fixed_vector(32, 8);
+
+  // 256-bit accumulators
+  const LLT ACC256 = LLT::fixed_vector(4, 64);
+
+  // 512-bit vectors
+  const LLT V16S32 = LLT::fixed_vector(16, 32);
+  const LLT V32S16 = LLT::fixed_vector(32, 16);
+  const LLT V64S8 = LLT::fixed_vector(64, 8);
+
+  // 512-bit accumulators
+  const LLT ACC512 = LLT::fixed_vector(8, 64);
+
+  // 1024-bit vectors
+  const LLT V32S32 = LLT::fixed_vector(32, 32);
+  const LLT V64S16 = LLT::fixed_vector(64, 16);
+  const LLT V128S8 = LLT::fixed_vector(128, 8);
+
+  // 1024-bit accumulators
+  const LLT ACC1024 = LLT::fixed_vector(16, 64);
+
+  const LLT S128 = LLT::scalar(128);
+
+  auto &IMPLICIT = getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
+                       .legalFor({S20, S32, P0, S128});
+
+  IMPLICIT.widenScalarToNextPow2(0).clampScalar(0, S32, S32);
+
+  getActionDefinitionsBuilder(G_CONSTANT)
+      .legalFor({S20, S32, P0})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  // FIXME: AIE1 actually supports float
+  getActionDefinitionsBuilder(G_FCONSTANT).customFor({S16, S32, S64});
+
+  getActionDefinitionsBuilder(G_ICMP)
+      .legalFor({{S32, S32}, {S32, P0}})
+      .clampScalar(0, S32, S32)
+      .clampScalar(1, S32, S32);
+
+  getActionDefinitionsBuilder({G_FMUL, G_FDIV, G_FADD, G_FSUB, G_FREM})
+      .libcallFor({S32, S64});
+
+  // Since the only integers smaller than 32 bits we produce are S20 (from
+  // G_PTRTOINT), the only legal extension is S20 -> S32.
+  // Extensions to types larger than 64 bits have to be broken down into
+  // multiple parts.
+  getActionDefinitionsBuilder({G_ANYEXT, G_SEXT, G_ZEXT})
+      .legalFor({{S32, S20}})
+      .clampScalar(0, S32, S32);
+  // FIXME: (s|z|any)ext s20 to s64 is broken.
+
+  getActionDefinitionsBuilder({G_AND, G_OR})
+      .legalFor({S32})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  getActionDefinitionsBuilder(G_XOR)
+      .legalFor({S32})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  getActionDefinitionsBuilder(G_SEXT_INREG).custom();
+
+  getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL})
+      .legalFor({{S32, S32}})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32)
+      .clampScalar(1, S32, S32);
+
+  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
+
+  getActionDefinitionsBuilder(G_SELECT)
+      .legalFor({{S32, S32}, {P0, S32}})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32)
+      .clampScalar(1, S32, S32);
+
+  getActionDefinitionsBuilder({G_ADD, G_SUB})
+      .legalFor({S32})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  // FIXME: G_SADDE/G_SSUBE doesn't support lowering. To support this properly,
+  // the action needs to be implemented
+  getActionDefinitionsBuilder({G_UADDO, G_USUBO, G_UADDE, G_USUBE, G_SADDO,
+                               G_SSUBO, G_SADDE, G_SSUBE, G_UADDSAT, G_USUBSAT,
+                               G_SADDSAT, G_SSUBSAT})
+      .lower();
+
+  getActionDefinitionsBuilder(G_MUL)
+      .legalFor({S32})
+      .widenScalarToNextPow2(0)
+      .minScalar(0, S32)
+      .libcallFor({S64});
+
+  // FIXME: G_SMULO, G_UMULO support
+  getActionDefinitionsBuilder({G_UMULH, G_SMULH}).lower();
+
+  getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_SREM, G_UREM})
+      .libcallFor({S32, S64})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S64);
+
+  getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM})
+      .lowerFor({S32, S64})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S64);
+
+  getActionDefinitionsBuilder(G_ABS)
+      .legalFor({S32})
+      .lowerFor({S64})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  // The CLZ instruction implements CTLZ, which also covers CTLZ_ZERO_UNDEF
+  getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+      .lowerFor({{S32, S32}})
+      .clampScalar(0, S32, S32)
+      .clampScalar(1, S32, S32);
+
+  getActionDefinitionsBuilder(G_CTLZ)
+      .legalFor({{S32, S32}})
+      .clampScalar(0, S32, S32)
+      .clampScalar(1, S32, S32);
+
+  getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
+
+  getActionDefinitionsBuilder({G_MEMCPY, G_MEMSET, G_MEMMOVE})
+      .customIf([=](const LegalityQuery &Query) {
+        const LLT SizeArg = Query.Types[2];
+        return SizeArg == S20;
+      })
+      .libcall();
+
+  getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
+  getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
+
+  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+      .widenScalarToNextPow2(0, 32)
+      .lower();
+
+  getActionDefinitionsBuilder({G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor({P0});
+
+  getActionDefinitionsBuilder(G_INTTOPTR)
+      .legalFor({{P0, S20}})
+      .widenScalarToNextPow2(1)
+      .clampScalar(1, S20, S20);
+
+  getActionDefinitionsBuilder(G_PTRTOINT)
+      .legalFor({{S20, P0}})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S20, S20);
+
+  // We support pointer arithmetic on both GPRs (32-bits) and pointer regs
+  // (20-bits, where the scalar addend resides in a MOD register). To allow
+  // specifying alternative register bank mappings, we need to truncate the RHS
+  // operand to 20-bits, thus we only allow s20 types for the scalar addend
+  getActionDefinitionsBuilder(G_PTR_ADD)
+      .legalFor({{P0, S20}})
+      .widenScalarToNextPow2(1)
+      .clampScalar(1, S20, S20);
+
+  getActionDefinitionsBuilder({G_LOAD, G_STORE})
+      .legalForTypesWithMemDesc({
+          {S32, P0, S8, 8},         {S32, P0, S16, 16},
+          {S20, P0, S20, 32},       {S32, P0, S32, 32},
+          {P0, P0, S20, 32},        {V16S8, P0, V16S8, 16},
+          {V8S16, P0, V8S16, 16},   {V4S32, P0, V4S32, 16},
+          {V8S32, P0, V8S32, 32},   {V16S16, P0, V16S16, 32},
+          {V32S8, P0, V32S8, 32},   {V16S32, P0, V16S32, 32},
+          {V32S16, P0, V32S16, 32}, {V64S8, P0, V64S8, 32},
+          {V32S32, P0, V32S32, 32}, {V64S16, P0, V64S16, 32},
+          {V128S8, P0, V128S8, 32}, {ACC256, P0, ACC256, 32},
+          {ACC512, P0, ACC512, 32}, {ACC1024, P0, ACC1024, 32},
+          {S128, P0, S128, 16},
+      })
+      .widenScalarToNextPow2(0)
+      .lowerIfMemSizeNotPow2()
+      .bitcastIf(
+          [=](const LegalityQuery &Query) {
+            const LLT &Ty = Query.Types[0];
+            return Ty.isVector() &&
+                   (Ty.getSizeInBits() == 64 || Ty.getSizeInBits() == 32);
+          },
+          [=](const LegalityQuery &Query) {
+            const LLT Ty = Query.Types[0];
+            const unsigned Size = Ty.getSizeInBits();
+            assert(Size % 32 == 0);
+            return std::pair(0, LLT::scalar(Size));
+          })
+      .clampScalar(0, S32, S32)
+      .lower();
+
+  // FIXME: Storing a pointer to an un-aligned address isn't supported.
+  getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD})
+      .legalForTypesWithMemDesc({{S32, P0, S8, 8}, {S32, P0, S16, 16}})
+      .widenScalarToNextPow2(0)
+      .lowerIfMemSizeNotPow2()
+      .clampScalar(0, S32, S32)
+      .lower();
+
+  // Control-flow
+  getActionDefinitionsBuilder(G_BRCOND).legalFor({S32}).clampScalar(0, S32,
+                                                                    S32);
+
+  getActionDefinitionsBuilder(G_PHI)
+      .legalFor({S20, S32, P0})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  getActionDefinitionsBuilder(G_JUMP_TABLE).custom();
+
+  getActionDefinitionsBuilder(G_BRJT).custom();
+
+  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({P0});
+
+  // Variadic functions
+  getActionDefinitionsBuilder(G_VASTART).custom();
+  getActionDefinitionsBuilder(G_VAARG).custom();
+
+  getLegacyLegalizerInfo().computeTables();
+  verify(*ST.getInstrInfo());
+}
+
+bool AIE1LegalizerInfo::legalizeCustom(
+    LegalizerHelper &Helper, MachineInstr &MI,
+    LostDebugLocObserver &LocObserver) const {
+  switch (MI.getOpcode()) {
+  default:
+    break;
+  case TargetOpcode::G_VASTART:
+    return AIEHelper.legalizeG_VASTART(Helper, MI);
+  case TargetOpcode::G_VAARG:
+    return AIEHelper.legalizeG_VAARG(Helper, MI);
+  case TargetOpcode::G_MEMSET:
+  case TargetOpcode::G_MEMCPY:
+  case TargetOpcode::G_MEMMOVE:
+    return AIEHelper.legalizeMemCalls(Helper, MI, LocObserver);
+  case TargetOpcode::G_BRJT:
+    return AIEHelper.legalizeG_BRJT(Helper, MI);
+  case TargetOpcode::G_FCONSTANT:
+    return AIEHelper.legalizeG_FCONSTANT(Helper, MI);
+  case TargetOpcode::G_JUMP_TABLE:
+    return AIEHelper.legalizeG_JUMP_TABLE(Helper, MI);
+  case TargetOpcode::G_DYN_STACKALLOC:
+    return AIEHelper.legalizeG_DYN_STACKALLOC(Helper, MI);
+  case TargetOpcode::G_SEXT_INREG:
+    return AIEHelper.legalizeG_SEXT_INREG(Helper, MI);
+  }
+
+  llvm_unreachable("Un-expected custom legalization");
+}
diff --git a/llvm/lib/Target/AIE/AIELegalizerInfo.h → llvm/lib/Target/AIE/AIE1LegalizerInfo.h b/llvm/lib/Target/AIE/AIELegalizerInfo.h → llvm/lib/Target/AIE/AIE1LegalizerInfo.h
@@ -12,8 +12,8 @@
 /// \todo This should be generated by TableGen.
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_TARGET_AIE_AIEMACHINELEGALIZER_H
-#define LLVM_LIB_TARGET_AIE_AIEMACHINELEGALIZER_H
+#ifndef LLVM_LIB_TARGET_AIE_AIE1MACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_AIE_AIE1MACHINELEGALIZER_H
 
 #include "AIELegalizerHelper.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@@ -25,15 +25,13 @@ namespace llvm {
 class AIEBaseSubtarget;
 
 /// This class provides legalization strategies.
-class AIELegalizerInfo : public LegalizerInfo {
+class AIE1LegalizerInfo : public LegalizerInfo {
   AIELegalizerHelper AIEHelper;
 
 public:
-  AIELegalizerInfo(const AIEBaseSubtarget &ST);
+  AIE1LegalizerInfo(const AIEBaseSubtarget &ST);
   bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
                       LostDebugLocObserver &LocObserver) const override;
-  bool legalizeIntrinsic(LegalizerHelper &Helper,
-                         MachineInstr &MI) const override;
 };
 } // end namespace llvm
 #endif