Skip to content

Commit

Permalink
fixup! do not use CSAState
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelmaitland committed Nov 5, 2024
1 parent 9cccaa3 commit e55606a
Show file tree
Hide file tree
Showing 8 changed files with 220 additions and 301 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class VPBuilder {
}

VPInstruction *createAnyActive(VPValue *Cond, DebugLoc DL,
const Twine &Name) {
const Twine &Name) {
return createInstruction(VPInstruction::AnyActive, {Cond}, DL, Name);
}

Expand All @@ -249,7 +249,7 @@ class VPBuilder {
}

VPInstruction *createAnyActiveEVL(VPValue *Cond, VPValue *EVL, DebugLoc DL,
const Twine &Name) {
const Twine &Name) {
return createInstruction(VPInstruction::AnyActiveEVL, {Cond, EVL}, DL,
Name);
}
Expand Down
136 changes: 49 additions & 87 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2971,7 +2971,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
fixupIVUsers(Entry.first, Entry.second,
getOrCreateVectorTripCount(nullptr),
IVEndValues[Entry.first], LoopMiddleBlock, State);
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
}

for (Instruction *PI : PredicatedInstructions)
Expand Down Expand Up @@ -8705,13 +8704,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
// directly, enabling more efficient codegen.
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
} else if (Legal->isCSAPhi(Phi)) {
VPCSAState *State = Plan.getCSAStates().find(Phi)->second;
VPValue *InitData = State->getVPInitData();
VPValue *InitScalar = Plan.getOrAddLiveIn(
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));

// Don't build full CSA for VF=ElementCount::getFixed(1)
bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return VF.isScalar(); }, Range);

// When the VF=getFixed(1), InitData is just InitScalar.
if (!InitData)
InitData = State->getVPInitScalar();
VPValue *InitData =
IsScalarVF ? InitScalar
: getVPValueOrAddLiveIn(PoisonValue::get(Phi->getType()));
PhiRecipe = new VPCSAHeaderPHIRecipe(Phi, InitData);
State->setPhiRecipe(cast<VPCSAHeaderPHIRecipe>(PhiRecipe));
} else {
llvm_unreachable(
"can only widen reductions, fixed-order recurrences, and CSAs here");
Expand Down Expand Up @@ -8752,13 +8756,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
return CSADescriptor::isCSASelect(CSA.second, SI);
});
if (CSADescIt != Legal->getCSAs().end()) {
PHINode *CSAPhi = CSADescIt->first;
VPCSAState *State = Plan.getCSAStates().find(CSAPhi)->second;
VPValue *VPDataPhi = State->getPhiRecipe();
auto *R = new VPCSADataUpdateRecipe(
SI, {VPDataPhi, Operands[0], Operands[1], Operands[2]});
State->setDataUpdate(R);
return R;
for (VPRecipeBase &R :
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
if (auto PhiR = dyn_cast<VPCSAHeaderPHIRecipe>(&R)) {
if (PhiR->getUnderlyingInstr() == CSADescIt->first) {
auto *R = new VPCSADataUpdateRecipe(
SI, {PhiR, Operands[0], Operands[1], Operands[2]});
PhiR->setDataUpdate(R);
return R;
}
}
}
}

return new VPWidenSelectRecipe(
Expand All @@ -8773,44 +8781,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
return tryToWiden(Instr, Operands, VPBB);
}

/// Add CSA Recipes that can occur before each instruction in the input IR
/// is processed and introduced into VPlan.
static void
addCSAPreprocessRecipes(const LoopVectorizationLegality::CSAList &CSAs,
Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
VPlan &Plan, VPRecipeBuilder &Builder) {

// Don't build full CSA for VF=ElementCount::getFixed(1)
bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return VF.isScalar(); }, Range);

for (const auto &CSA : CSAs) {
VPValue *VPInitScalar = Plan.getOrAddLiveIn(
CSA.first->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));

// Scalar VF builds the scalar version of the loop. In that case,
// no maintenence of mask nor extraction in middle block is needed.
if (IsScalarVF) {
VPCSAState *S = new VPCSAState(VPInitScalar);
Plan.addCSAState(CSA.first, S);
continue;
}

VPBuilder PHB(PreheaderVPBB);
auto *VPInitMask = Builder.getVPValueOrAddLiveIn(
ConstantInt::getFalse(Type::getInt1Ty(CSA.first->getContext())));
auto *VPInitData =
Builder.getVPValueOrAddLiveIn(PoisonValue::get(CSA.first->getType()));

VPBuilder HB(HeaderVPBB);
auto *VPMaskPhi = HB.createCSAMaskPhi(VPInitMask, DL, "csa.mask.phi");

auto *S = new VPCSAState(VPInitScalar, VPInitData, VPMaskPhi);
Plan.addCSAState(CSA.first, S);
}
}

/// Add CSA Recipes that must occur after each instruction in the input IR
/// is processed and introduced into VPlan.
static void
Expand All @@ -8823,60 +8793,57 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
[&](ElementCount VF) { return VF.isScalar(); }, Range))
return;

VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
for (const auto &CSA : CSAs) {
VPCSAState *CSAState = Plan.getCSAStates().find(CSA.first)->second;
VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate();
// Build the MaskPhi recipe.
auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn(
ConstantInt::getFalse(Type::getInt1Ty(CSA.first->getContext())));
VPBuilder B;
B.setInsertPoint(Header, Header->getFirstNonPhi());
auto *VPMaskPhi = B.createCSAMaskPhi(VPInitMask, DL, "csa.mask.phi");
B.clearInsertionPoint();

assert(VPDataUpdate &&
"VPDataUpdate must have been introduced prior to postprocess");
assert(CSA.second.getCond() &&
"CSADescriptor must know how to describe the condition");
auto GetVPValue = [&](Value *I) {
return RecipeBuilder.getRecipe(cast<Instruction>(I))->getVPSingleValue();
};
VPValue *WidenedCond = GetVPValue(CSA.second.getCond());
VPValue *VPInitScalar = CSAState->getVPInitScalar();
VPCSADataUpdateRecipe *VPDataUpdate = cast<VPCSADataUpdateRecipe>(
cast<VPCSAHeaderPHIRecipe>(GetVPValue(CSA.first))->getVPNewData());

// The CSA optimization wants to use a condition such that when it is
// true, a new value is assigned. However, it is possible that a true lane
// in WidenedCond corresponds to selection of the initial value instead.
// In that case, we must use the negation of WidenedCond.
// i.e. select cond new_val old_val versus select cond.not old_val new_val
assert(CSA.second.getCond() &&
"CSADescriptor must know how to describe the condition");
VPValue *WidenedCond = GetVPValue(CSA.second.getCond());
VPValue *CondToUse = WidenedCond;
VPBuilder B;
if (cast<SelectInst>(CSA.second.getAssignment())->getTrueValue() ==
CSA.first) {
auto *VPNotCond = B.createNot(WidenedCond, DL);
VPNotCond->insertBefore(
GetVPValue(CSA.second.getAssignment())->getDefiningRecipe());
VPNotCond->insertBefore(VPDataUpdate);
CondToUse = VPNotCond;
}

auto *VPAnyActive =
B.createAnyActive(CondToUse, DL, "csa.cond.anyactive");
VPAnyActive->insertBefore(
GetVPValue(CSA.second.getAssignment())->getDefiningRecipe());
auto *VPAnyActive = B.createAnyActive(CondToUse, DL, "csa.cond.anyactive");
VPAnyActive->insertBefore(VPDataUpdate);

auto *VPMaskSel = B.createCSAMaskSel(CondToUse, CSAState->getVPMaskPhi(),
VPAnyActive, DL, "csa.mask.sel");
auto *VPMaskSel = B.createCSAMaskSel(CondToUse, VPMaskPhi, VPAnyActive, DL,
"csa.mask.sel");
VPMaskSel->insertAfter(VPAnyActive);

VPDataUpdate->setVPNewMaskAndVPAnyActive(VPMaskSel, VPAnyActive);
VPValue *VPInitScalar = Plan.getOrAddLiveIn(
CSA.first->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
SmallVector<PHINode *> PhiToFix;
for (User *U : VPDataUpdate->getUnderlyingValue()->users())
if (auto *Phi = dyn_cast<PHINode>(U);
Phi && Phi->getParent() == OrigLoop->getUniqueExitBlock())
PhiToFix.emplace_back(Phi);
VPCSAExtractScalarRecipe *ExtractScalarRecipe =
new VPCSAExtractScalarRecipe({VPInitScalar, VPMaskSel, VPDataUpdate});

new VPCSAExtractScalarRecipe({VPInitScalar, VPMaskSel, VPDataUpdate},
PhiToFix);
MiddleVPBB->insert(ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi());

// Update CSAState with new recipes
CSAState->setExtractScalarRecipe(ExtractScalarRecipe);
CSAState->setVPAnyActive(VPAnyActive);

// Add live out for the CSA. We should be in LCSSA, so we are looking for
// Phi users in the unique exit block of the original updated value.
BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock();
assert(OrigExit && "Expected a single exit block");
for (User *U :VPDataUpdate->getUnderlyingValue()->users())
if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent() == OrigExit)
Plan.addLiveOut(Phi, ExtractScalarRecipe);
}
}

Expand Down Expand Up @@ -9194,11 +9161,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {

VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);

addCSAPreprocessRecipes(Legal->getCSAs(), OrigLoop, Plan->getPreheader(),
Plan->getVectorLoopRegion()->getEntryBasicBlock(), DL,
Range, *Plan, RecipeBuilder);


// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
// process after constructing the initial VPlan.
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -856,9 +856,6 @@ VPlan::~VPlan() {
delete VPV;
if (BackedgeTakenCount)
delete BackedgeTakenCount;

for (std::pair<PHINode *, VPCSAState *> &S : CSAStates)
delete S.second;
}

VPIRBasicBlock *VPIRBasicBlock::fromBasicBlock(BasicBlock *IRBB) {
Expand Down
75 changes: 13 additions & 62 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,53 +231,6 @@ class VPLane {
}
};

class VPInstruction;
class VPCSAHeaderPHIRecipe;
class VPCSADataUpdateRecipe;
class VPCSAExtractScalarRecipe;

/// VPCSAState holds information required to vectorize a conditional scalar
/// assignment.
class VPCSAState {
VPValue *VPInitScalar = nullptr;
VPValue *VPInitData = nullptr;
VPInstruction *VPMaskPhi = nullptr;
VPInstruction *VPAnyActive = nullptr;
VPCSAHeaderPHIRecipe *VPPhiRecipe = nullptr;
VPCSADataUpdateRecipe *VPDataUpdate = nullptr;
VPCSAExtractScalarRecipe *VPExtractScalar = nullptr;

public:
VPCSAState(VPValue *VPInitScalar, VPValue *InitData,
VPInstruction *MaskPhi)
: VPInitScalar(VPInitScalar), VPInitData(InitData), VPMaskPhi(MaskPhi) {}

VPCSAState(VPValue *VPInitScalar) : VPInitScalar(VPInitScalar) {}

VPValue *getVPInitScalar() const { return VPInitScalar; }

VPValue *getVPInitData() const { return VPInitData; }

VPInstruction *getVPMaskPhi() const { return VPMaskPhi; }

void setVPAnyActive(VPInstruction *AnyActive) { VPAnyActive = AnyActive; }
VPInstruction *getVPAnyActive() { return VPAnyActive; }

VPCSAHeaderPHIRecipe *getPhiRecipe() const { return VPPhiRecipe; }

void setPhiRecipe(VPCSAHeaderPHIRecipe *R) { VPPhiRecipe = R; }

VPCSADataUpdateRecipe *getDataUpdate() const { return VPDataUpdate; }
void setDataUpdate(VPCSADataUpdateRecipe *R) { VPDataUpdate = R; }

void setExtractScalarRecipe(VPCSAExtractScalarRecipe *R) {
VPExtractScalar = R;
}
VPCSAExtractScalarRecipe *getExtractScalarRecipe() const {
return VPExtractScalar;
}
};

/// VPTransformState holds information passed down when "executing" a VPlan,
/// needed for generating the output IR.
struct VPTransformState {
Expand Down Expand Up @@ -2893,7 +2846,10 @@ class VPCSAHeaderPHIRecipe final : public VPHeaderPHIRecipe {
}

VPValue *getVPInitData() { return getOperand(0); }
VPValue *getVPNewData() { return getOperand(1); }

VPValue *NewData = nullptr;
void setDataUpdate(VPValue *V) { NewData = V; }
VPValue *getVPNewData() { return NewData; }
};

class VPCSADataUpdateRecipe final : public VPSingleDefRecipe {
Expand Down Expand Up @@ -2947,15 +2903,19 @@ class VPCSADataUpdateRecipe final : public VPSingleDefRecipe {
};

class VPCSAExtractScalarRecipe final : public VPSingleDefRecipe {
SmallVector<PHINode *> PhisToFix;

public:
VPCSAExtractScalarRecipe(ArrayRef<VPValue *> Operands)
: VPSingleDefRecipe(VPDef::VPCSAExtractScalarSC, Operands) {}
VPCSAExtractScalarRecipe(ArrayRef<VPValue *> Operands,
SmallVector<PHINode *> PhisToFix)
: VPSingleDefRecipe(VPDef::VPCSAExtractScalarSC, Operands),
PhisToFix(PhisToFix) {}

~VPCSAExtractScalarRecipe() override = default;

VPCSAExtractScalarRecipe *clone() override {
SmallVector<VPValue *> Ops(operands());
return new VPCSAExtractScalarRecipe(Ops);
return new VPCSAExtractScalarRecipe(Ops, PhisToFix);
}

void execute(VPTransformState &State) override;
Expand All @@ -2969,6 +2929,8 @@ class VPCSAExtractScalarRecipe final : public VPSingleDefRecipe {
VPSlotTracker &SlotTracker) const override;
#endif

VP_CLASSOF_IMPL(VPDef::VPCSAExtractScalarSC)

VPValue *getVPInitScalar() const { return getOperand(0); }
VPValue *getVPMaskSel() const { return getOperand(1); }
VPValue *getVPDataSel() const { return getOperand(2); }
Expand Down Expand Up @@ -3948,11 +3910,6 @@ class VPlan {
/// definitions are VPValues that hold a pointer to their underlying IR.
SmallVector<VPValue *, 16> VPLiveInsToFree;

/// Values used outside the plan. It contains live-outs that need fixing. Any
/// live-out that is fixed outside VPlan needs to be removed. The remaining
/// live-outs are fixed via VPLiveOut::fixPhi.
MapVector<PHINode *, VPLiveOut *> LiveOuts;

/// Mapping from SCEVs to the VPValues representing their expansions.
/// NOTE: This mapping is temporary and will be removed once all users have
/// been modeled in VPlan directly.
Expand Down Expand Up @@ -4003,12 +3960,6 @@ class VPlan {
bool RequiresScalarEpilogueCheck,
bool TailFolded, Loop *TheLoop);

void addCSAState(PHINode *Phi, VPCSAState *S) { CSAStates.insert({Phi, S}); }

MapVector<PHINode *, VPCSAState *> const &getCSAStates() const {
return CSAStates;
}

/// Prepare the plan for execution, setting up the required live-in values.
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
Value *CanonicalIVStartValue, VPTransformState &State);
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -672,8 +672,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
ConstantInt::get(WidenedCond->getType()->getScalarType(), 0);
Value *AnyActive = State.Builder.CreateIntrinsic(
WidenedCond->getType()->getScalarType(), Intrinsic::vp_reduce_or,
{StartValue, WidenedCond, AllOnesMask, EVL}, nullptr,
"any.active");
{StartValue, WidenedCond, AllOnesMask, EVL}, nullptr, "any.active");
return AnyActive;
}
case VPInstruction::CSAVLPhi: {
Expand Down Expand Up @@ -2502,6 +2501,7 @@ void VPCSADataUpdateRecipe::execute(VPTransformState &State) {
"csa.data.sel");

DataPhi->addIncoming(DataSel, State.CFG.PrevBB);

State.set(this, DataSel);
}

Expand Down Expand Up @@ -2592,6 +2592,10 @@ void VPCSAExtractScalarRecipe::execute(VPTransformState &State) {
Value *LastIdxGEZero = State.Builder.CreateICmpSGE(LastIdx, Zero);
Value *ChooseFromVecOrInit =
State.Builder.CreateSelect(LastIdxGEZero, ExtractFromVec, InitScalar);

for (PHINode *Phi : PhisToFix)
Phi->addIncoming(ChooseFromVecOrInit, State.CFG.ExitBB);

State.set(this, ChooseFromVecOrInit, /*IsScalar=*/true);
}

Expand Down
Loading

0 comments on commit e55606a

Please sign in to comment.