Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AIE NFC] Postpipeliner cleanups and refactorings #253

Open
wants to merge 9 commits into
base: aie-public
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ static cl::opt<int> PostPipelinerMaxII(
"aie-postpipeliner-maxii", cl::init(40),
cl::desc("[AIE] Maximum II to be tried in the post-ra pipeliner"));

static cl::opt<int> PostPipelinerMaxTryII(
"aie-postpipeliner-maxtry-ii", cl::init(10),
cl::desc("[AIE] Maximum II steps to be tried in the post-ra pipeliner"));

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rationale: It is difficult to give a useful absolute value. for small ResMII we will be trying way beyond usefulness, for larger ResMII it may not be enough. A relative amount works better.

namespace llvm::AIE {

void dumpInterBlock(const InterBlockEdges &Edges) {
Expand Down Expand Up @@ -600,6 +604,7 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) {
auto &PostSWP = BS.getPostSWP();
if (PostSWP.canAccept(*BS.TheBlock)) {
BS.FixPoint.II = PostSWP.getResMII(*BS.TheBlock);
BS.FixPoint.IITries = 1;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also simplifies using a solver only on the first few tries

return BS.FixPoint.Stage = SchedulingStage::Pipelining;
}
}
Expand All @@ -614,7 +619,8 @@ SchedulingStage InterBlockScheduling::updatePipelining(BlockState &BS) {

// Otherwise try a larger II.
// We cut off at larger IIs to prevent excessive compilation time.
if (++BS.FixPoint.II <= PostPipelinerMaxII) {
if (++BS.FixPoint.II <= PostPipelinerMaxII &&
++BS.FixPoint.IITries <= PostPipelinerMaxTryII) {
return BS.FixPoint.Stage = SchedulingStage::Pipelining;
}

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AIE/AIEInterBlockScheduling.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ class FixedpointState {
int ResourceMargin = 0;
// The II of the modulo schedule we are trying.
int II = 0;
// The number of II steps we've made from the minimum
int IITries = 0;
// Results from the convergence test
int MaxLatencyExtent = 0;
int MaxResourceExtent = 0;
Expand Down
180 changes: 122 additions & 58 deletions llvm/lib/Target/AIE/AIEPostPipeliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/Support/MathExtras.h"
#include <limits>
#include <string>

#define DEBUG_TYPE "postpipeliner"
#define DEBUG_SUMMARY(X) DEBUG_WITH_TYPE("postpipeliner-summary", X)
Expand Down Expand Up @@ -200,6 +202,11 @@ void PostPipeliner::computeForward() {
for (int K = 0; K < NInstr; K++) {
auto &Me = Info[K];
SUnit &SU = DAG->SUnits[K];
Me.Slots = getSlotCounts(*SU.getInstr(), TII);
// Accumulate the slots of Me and all data predecessors.
SlotCounts Slots(Me.Slots);
int PredEarliest = std::numeric_limits<int>::max();
int Count = 0;
for (auto &Dep : SU.Preds) {
if (Dep.getKind() != SDep::Data) {
continue;
Expand All @@ -208,10 +215,18 @@ void PostPipeliner::computeForward() {
assert(P < K);
Me.Ancestors.insert(P);
auto &Pred = Info[P];
Slots += Pred.Slots;
Count++;
PredEarliest = std::min(PredEarliest, Pred.Earliest);
for (int Anc : Pred.Ancestors) {
Me.Ancestors.insert(Anc);
}
}
// When we need more slots than we have data predecessors, we have local
// resource contention that we can safely account for in Earliest.
if (Count > 0 && Slots.max() > Count) {
Me.Earliest = std::max(Me.Earliest, PredEarliest + Slots.max() - 1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking: Do we have something similar to bias Latest based on successors? I think yes, but my memory is blurry.

}
for (auto &Dep : SU.Succs) {
auto *Succ = Dep.getSUnit();
if (Succ->isBoundaryNode()) {
Expand All @@ -221,7 +236,6 @@ void PostPipeliner::computeForward() {
const int NewEarliest = Me.Earliest + Dep.getSignedLatency();
SInfo.Earliest = std::max(SInfo.Earliest, NewEarliest);
}
Me.Slots = getSlotCounts(*SU.getInstr(), TII);
}
}

Expand Down Expand Up @@ -323,11 +337,15 @@ bool PostPipeliner::computeLoopCarriedParameters() {
}

// Save the static values for ease of reset
for (auto &N : Info) {
for (auto &N : Info.Nodes) {
N.StaticEarliest = N.Earliest;
N.StaticLatest = N.Latest;
}
return true;
Info.compute();

// If no node can be scheduled in cycle 0, we must have a circuit that
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: If no node can be scheduled in cycle 0 after accounting for LCDs, ...

// is longer than II
return Info.MinEarliest == 0;
}

int PostPipeliner::computeMinScheduleLength() const {
Expand All @@ -343,24 +361,23 @@ int PostPipeliner::computeMinScheduleLength() const {
return MinLength;
}

void dumpGraph(int NInstr, const std::vector<NodeInfo> &Info,
ScheduleDAGInstrs *DAG) {
void dumpGraph(const ScheduleInfo &Info, ScheduleDAGInstrs *DAG) {
dbgs() << "digraph {\n";

for (int K = 0; K < NInstr; K++) {
for (int K = 0; K < Info.NInstr; K++) {
auto &SU = DAG->SUnits[K];
for (auto &Dep : SU.Succs) {
auto *Succ = Dep.getSUnit();
int S = Succ->NodeNum;
if (S % NInstr == K) {
if (S % Info.NInstr == K || Succ->isBoundaryNode()) {
continue;
}

dbgs() << "\tSU" << K << " -> "
<< "SU" << S;

if (S >= NInstr) {
dbgs() << "_" << S % NInstr;
if (S >= Info.NInstr) {
dbgs() << "_" << S % Info.NInstr;
}
if (Dep.getKind() == SDep::Data) {
dbgs() << " [color=red] ";
Expand All @@ -380,6 +397,25 @@ void dumpGraph(int NInstr, const std::vector<NodeInfo> &Info,
dbgs() << "}\n";
}

void dumpIntervals(const ScheduleInfo &Info, int MinLength) {
dbgs() << "Intervals:\n";
for (int K = 0; K < Info.NInstr; K++) {
std::string Head = "SU" + std::to_string(K);
dbgs() << Head;
for (int I = Head.length() - 6; I < MinLength; I++) {
if (I == 0) {
dbgs() << "|";
}
if (I >= Info[K].Earliest && I <= MinLength + Info[K].Latest) {
dbgs() << "*";
} else {
dbgs() << " ";
}
}
dbgs() << "\n";
}
}

int PostPipeliner::mostUrgent(PostPipelinerStrategy &Strategy) {
assert(FirstUnscheduled <= LastUnscheduled);
while (Info[FirstUnscheduled].Scheduled) {
Expand Down Expand Up @@ -476,7 +512,7 @@ bool PostPipeliner::scheduleFirstIteration(PostPipelinerStrategy &Strategy) {
}

namespace {
void dumpEarliestChain(const std::vector<NodeInfo> &Info, int N) {
void dumpEarliestChain(const ScheduleInfo &Info, int N) {
auto Prev = Info[N].LastEarliestPusher;
if (Prev) {
dumpEarliestChain(Info, *Prev);
Expand Down Expand Up @@ -515,16 +551,20 @@ bool PostPipeliner::scheduleOtherIterations() {

class DefaultStrategy : public PostPipelinerStrategy {
public:
DefaultStrategy(ScheduleDAGMI &DAG, std::vector<NodeInfo> &Info,
int LatestBias)
DefaultStrategy(ScheduleDAGMI &DAG, ScheduleInfo &Info, int LatestBias)
: PostPipelinerStrategy(DAG, Info, LatestBias) {}
bool better(const SUnit &A, const SUnit &B) override {
return Info[A.NodeNum].Latest < Info[B.NodeNum].Latest;
}
};

class ConfigStrategy : public PostPipelinerStrategy {
protected:
int II = 0;

private:
bool TopDown = true;
bool Alternate = false;

public:
enum PriorityComponent {
Expand Down Expand Up @@ -552,6 +592,13 @@ class ConfigStrategy : public PostPipelinerStrategy {
}
return "Size - Illegal";
}
struct Configuration {
int ExtraStages = 0;
bool TopDown = true;
bool Alternate = false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to document those fields, especially Alternate

int Runs = 0;
ArrayRef<PriorityComponent> Components;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not own the list of components? Does that help a lot with memory usage?

};

private:
std::string Name;
Expand Down Expand Up @@ -597,6 +644,12 @@ class ConfigStrategy : public PostPipelinerStrategy {
return false;
}

int earliest(const SUnit &N) override { return Info[N.NodeNum].Earliest; }

int latest(const SUnit &N) override {
return Info[N.NodeNum].Latest + LatestBias;
}

void selected(const SUnit &N) override {
// Promote the critical path
NodeInfo *Pushed = &Info[N.NodeNum];
Expand Down Expand Up @@ -628,75 +681,71 @@ class ConfigStrategy : public PostPipelinerStrategy {
PredSiblingScheduled.insert(PDep.getSUnit()->NodeNum);
}
}
if (Alternate) {
TopDown = !TopDown;
}
}

public:
std::string name() override { return Name; }
ConfigStrategy(ScheduleDAGInstrs &DAG, std::vector<NodeInfo> &Info,
int Length, bool TopDown,
ConfigStrategy(ScheduleDAGInstrs &DAG, ScheduleInfo &Info, int Length, int II,
bool TopDown, bool Alternate,
ArrayRef<PriorityComponent> Components)
: PostPipelinerStrategy(DAG, Info, Length), TopDown(TopDown) {
Name = "Config_" + std::to_string(Length) + "_" + std::to_string(TopDown);
: PostPipelinerStrategy(DAG, Info, Length), II(II), TopDown(TopDown),
Alternate(Alternate) {
Name = "Config_" + std::to_string(Length) + "_" + std::to_string(TopDown) +
"_" + std::to_string(Alternate);
for (auto Comp : Components) {
Name += "_" + getPriorityName(Comp);
Priority.emplace_back(Comp);
}
}
};

static const struct {
int ExtraStages;
bool TopDown;
bool Rerun;
ConfigStrategy::PriorityComponent Components[3];
} Strategies[] = {
static const ConfigStrategy::PriorityComponent
NodeNum[] = {ConfigStrategy::NodeNum},
Latest[] = {ConfigStrategy::Latest},
Critical[] = {ConfigStrategy::Critical},
CriticalLCDLatest[] = {ConfigStrategy::Critical, ConfigStrategy::LCDLatest};

static const ConfigStrategy::Configuration Strategies[] = {
// Loosely speaking, a lower value of the first parameter targets
// a lower stage count, which benefits code size.
// Rerurn is only useful for heuristics that use it, e.g. Critical
{1, true, false, {ConfigStrategy::NodeNum}},
{1, true, false, {ConfigStrategy::Latest}},
{1, true, true, {ConfigStrategy::Critical}},
{1, true, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}},
{0, false, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}},
{1, false, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}},
// Runs>1 is only useful for heuristics that use it, e.g. Critical
// {ExtraStages, TopDown, Alternate, Runs, Components}
{1, true, false, 1, NodeNum},
{1, true, false, 1, Latest},
{1, true, false, 2, Critical},
{1, true, false, 2, CriticalLCDLatest},
{0, false, false, 2, CriticalLCDLatest},
{1, false, false, 2, CriticalLCDLatest},
// This is pure bottom up
{1, false, false, {ConfigStrategy::NodeNum}},
{1, false, false, 1, NodeNum},
};

bool PostPipeliner::tryHeuristics() {
int MinLength = computeMinScheduleLength();

DEBUG_SUMMARY(dbgs() << "-- MinLength=" << MinLength << "\n");

int HeuristicIndex = 0;
for (auto &[ExtraStages, TopDown, Rerun, Components] : Strategies) {
for (const auto &Config : Strategies) {
if (Heuristic >= 0 && Heuristic != HeuristicIndex++) {
continue;
}
ConfigStrategy S(*DAG, Info, MinLength + ExtraStages * II, TopDown,
Components);
ConfigStrategy S(*DAG, Info, MinLength + Config.ExtraStages * II, II,
Config.TopDown, Config.Alternate, Config.Components);
resetSchedule(/*FullReset=*/true);
DEBUG_SUMMARY(dbgs() << "--- Strategy " << S.name() << "\n");
if (scheduleFirstIteration(S) && scheduleOtherIterations()) {
DEBUG_SUMMARY(dbgs() << " Strategy " << S.name() << " found II=" << II
for (int Run = 0; Run < Config.Runs; Run++) {
DEBUG_SUMMARY(dbgs() << "--- Strategy " << S.name() << " run=" << Run
<< "\n");
return true;
}

DEBUG_SUMMARY(dbgs() << " failed\n");
if (!Rerun) {
continue;
}

// Rerun with dynamic information retained
resetSchedule(/*FullReset=*/false);
DEBUG_SUMMARY(dbgs() << "--- Strategy " << S.name()
<< " with critical path");
if (scheduleFirstIteration(S) && scheduleOtherIterations()) {
DEBUG_SUMMARY(dbgs() << " found II=" << II << "\n");
return true;
if (scheduleFirstIteration(S) && scheduleOtherIterations()) {
DEBUG_SUMMARY(dbgs() << " Strategy " << S.name() << " run=" << Run
<< " found II=" << II << "\n");
return true;
}
resetSchedule(/*FullReset=*/false);
}
DEBUG_SUMMARY(dbgs() << " failed\n");
DEBUG_SUMMARY(dbgs() << " Strategy " << S.name() << " failed\n");
}
DEBUG_SUMMARY(dbgs() << "=== II=" << II << " Failed ===\n");
return false;
Expand All @@ -707,7 +756,7 @@ bool PostPipeliner::schedule(ScheduleDAGMI &TheDAG, int InitiationInterval) {
assert(NTotalInstrs % NInstr == 0);
NCopies = NTotalInstrs / NInstr;
if (NCopies == 1) {
LLVM_DEBUG(dbgs() << "PostPipeliner: Not feasible\n");
LLVM_DEBUG(dbgs() << "PostPipeliner: Not feasible - Too few stages\n");
return false;
}
II = InitiationInterval;
Expand All @@ -718,15 +767,19 @@ bool PostPipeliner::schedule(ScheduleDAGMI &TheDAG, int InitiationInterval) {
Depth = NCopies * II + HR.getPipelineDepth();
Scoreboard.reset(Depth);

Info.clear();
Info.resize(NTotalInstrs);
Info.init(NInstr, NCopies);

LLVM_DEBUG(for (int I = 0; I < NInstr;
I++) { dbgs() << I << " " << *DAG->SUnits[I].getInstr(); });
LLVM_DEBUG(dumpGraph(NInstr, Info, DAG));
LLVM_DEBUG(dumpGraph(Info, DAG));

computeLoopCarriedParameters();
bool Feasible = computeLoopCarriedParameters();
if (!Feasible) {
LLVM_DEBUG(dbgs() << "PostPipeliner: Not feasible - RecMII\n");
return false;
}

LLVM_DEBUG(dumpIntervals(Info, computeMinScheduleLength()));
if (!tryHeuristics()) {
LLVM_DEBUG(dbgs() << "PostPipeliner: No schedule found\n");
return false;
Expand Down Expand Up @@ -838,4 +891,15 @@ void NodeInfo::reset(bool FullReset) {
}
}

void ScheduleInfo::compute() {
MinEarliest = 0;
MaxEarliest = 0;
MinLatest = -1;
for (int K = 0; K < NInstr; K++) {
MinEarliest = std::min(MinEarliest, Nodes[K].Earliest);
MaxEarliest = std::max(MaxEarliest, Nodes[K].Earliest);
MinLatest = std::min(MinLatest, Nodes[K].Latest);
}
}

} // namespace llvm::AIE
Loading