-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AIE NFC] Postpipeliner cleanups and refactorings #253
base: aie-public
Are you sure you want to change the base?
Changes from 8 commits
8baa1cf
03951f0
3cd1db1
37d1a95
505d73f
11bc08e
6636a0a
7f4c316
95dbddf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,6 +58,10 @@ static cl::opt<int> PostPipelinerMaxII( | |
"aie-postpipeliner-maxii", cl::init(40), | ||
cl::desc("[AIE] Maximum II to be tried in the post-ra pipeliner")); | ||
|
||
static cl::opt<int> PostPipelinerMaxTryII( | ||
"aie-postpipeliner-maxtry-ii", cl::init(10), | ||
cl::desc("[AIE] Maximum II steps to be tried in the post-ra pipeliner")); | ||
|
||
namespace llvm::AIE { | ||
|
||
void dumpInterBlock(const InterBlockEdges &Edges) { | ||
|
@@ -600,6 +604,7 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) { | |
auto &PostSWP = BS.getPostSWP(); | ||
if (PostSWP.canAccept(*BS.TheBlock)) { | ||
BS.FixPoint.II = PostSWP.getResMII(*BS.TheBlock); | ||
BS.FixPoint.IITries = 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also simplifies using a solver only on the first few tries |
||
return BS.FixPoint.Stage = SchedulingStage::Pipelining; | ||
} | ||
} | ||
|
@@ -614,7 +619,8 @@ SchedulingStage InterBlockScheduling::updatePipelining(BlockState &BS) { | |
|
||
// Otherwise try a larger II. | ||
// We cut off at larger IIs to prevent excessive compilation time. | ||
if (++BS.FixPoint.II <= PostPipelinerMaxII) { | ||
if (++BS.FixPoint.II <= PostPipelinerMaxII && | ||
++BS.FixPoint.IITries <= PostPipelinerMaxTryII) { | ||
return BS.FixPoint.Stage = SchedulingStage::Pipelining; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,8 @@ | |
#include "llvm/CodeGen/ScheduleDAG.h" | ||
#include "llvm/CodeGen/ScheduleDAGInstrs.h" | ||
#include "llvm/Support/MathExtras.h" | ||
#include <limits> | ||
#include <string> | ||
|
||
#define DEBUG_TYPE "postpipeliner" | ||
#define DEBUG_SUMMARY(X) DEBUG_WITH_TYPE("postpipeliner-summary", X) | ||
|
@@ -200,6 +202,11 @@ void PostPipeliner::computeForward() { | |
for (int K = 0; K < NInstr; K++) { | ||
auto &Me = Info[K]; | ||
SUnit &SU = DAG->SUnits[K]; | ||
Me.Slots = getSlotCounts(*SU.getInstr(), TII); | ||
// Accumulate the slots of Me and all data predecessors. | ||
SlotCounts Slots(Me.Slots); | ||
int PredEarliest = std::numeric_limits<int>::max(); | ||
int Count = 0; | ||
for (auto &Dep : SU.Preds) { | ||
if (Dep.getKind() != SDep::Data) { | ||
continue; | ||
|
@@ -208,10 +215,18 @@ void PostPipeliner::computeForward() { | |
assert(P < K); | ||
Me.Ancestors.insert(P); | ||
auto &Pred = Info[P]; | ||
Slots += Pred.Slots; | ||
Count++; | ||
PredEarliest = std::min(PredEarliest, Pred.Earliest); | ||
for (int Anc : Pred.Ancestors) { | ||
Me.Ancestors.insert(Anc); | ||
} | ||
} | ||
// When we need more slots than we have data predecessors, we have local | ||
// resource contention that we can safely account for in Earliest. | ||
if (Count > 0 && Slots.max() > Count) { | ||
Me.Earliest = std::max(Me.Earliest, PredEarliest + Slots.max() - 1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Checking: Do we have something similar to bias |
||
} | ||
for (auto &Dep : SU.Succs) { | ||
auto *Succ = Dep.getSUnit(); | ||
if (Succ->isBoundaryNode()) { | ||
|
@@ -221,7 +236,6 @@ void PostPipeliner::computeForward() { | |
const int NewEarliest = Me.Earliest + Dep.getSignedLatency(); | ||
SInfo.Earliest = std::max(SInfo.Earliest, NewEarliest); | ||
} | ||
Me.Slots = getSlotCounts(*SU.getInstr(), TII); | ||
} | ||
} | ||
|
||
|
@@ -323,11 +337,15 @@ bool PostPipeliner::computeLoopCarriedParameters() { | |
} | ||
|
||
// Save the static values for ease of reset | ||
for (auto &N : Info) { | ||
for (auto &N : Info.Nodes) { | ||
N.StaticEarliest = N.Earliest; | ||
N.StaticLatest = N.Latest; | ||
} | ||
return true; | ||
Info.compute(); | ||
|
||
// If no node can be scheduled in cycle 0, we must have a circuit that | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: If no node can be scheduled in cycle 0 after accounting for LCDs, ... |
||
// is longer than II | ||
return Info.MinEarliest == 0; | ||
} | ||
|
||
int PostPipeliner::computeMinScheduleLength() const { | ||
|
@@ -343,24 +361,23 @@ int PostPipeliner::computeMinScheduleLength() const { | |
return MinLength; | ||
} | ||
|
||
void dumpGraph(int NInstr, const std::vector<NodeInfo> &Info, | ||
ScheduleDAGInstrs *DAG) { | ||
void dumpGraph(const ScheduleInfo &Info, ScheduleDAGInstrs *DAG) { | ||
dbgs() << "digraph {\n"; | ||
|
||
for (int K = 0; K < NInstr; K++) { | ||
for (int K = 0; K < Info.NInstr; K++) { | ||
auto &SU = DAG->SUnits[K]; | ||
for (auto &Dep : SU.Succs) { | ||
auto *Succ = Dep.getSUnit(); | ||
int S = Succ->NodeNum; | ||
if (S % NInstr == K) { | ||
if (S % Info.NInstr == K || Succ->isBoundaryNode()) { | ||
continue; | ||
} | ||
|
||
dbgs() << "\tSU" << K << " -> " | ||
<< "SU" << S; | ||
|
||
if (S >= NInstr) { | ||
dbgs() << "_" << S % NInstr; | ||
if (S >= Info.NInstr) { | ||
dbgs() << "_" << S % Info.NInstr; | ||
} | ||
if (Dep.getKind() == SDep::Data) { | ||
dbgs() << " [color=red] "; | ||
|
@@ -380,6 +397,25 @@ void dumpGraph(int NInstr, const std::vector<NodeInfo> &Info, | |
dbgs() << "}\n"; | ||
} | ||
|
||
void dumpIntervals(const ScheduleInfo &Info, int MinLength) { | ||
dbgs() << "Intervals:\n"; | ||
for (int K = 0; K < Info.NInstr; K++) { | ||
std::string Head = "SU" + std::to_string(K); | ||
dbgs() << Head; | ||
for (int I = Head.length() - 6; I < MinLength; I++) { | ||
if (I == 0) { | ||
dbgs() << "|"; | ||
} | ||
if (I >= Info[K].Earliest && I <= MinLength + Info[K].Latest) { | ||
dbgs() << "*"; | ||
} else { | ||
dbgs() << " "; | ||
} | ||
} | ||
dbgs() << "\n"; | ||
} | ||
} | ||
|
||
int PostPipeliner::mostUrgent(PostPipelinerStrategy &Strategy) { | ||
assert(FirstUnscheduled <= LastUnscheduled); | ||
while (Info[FirstUnscheduled].Scheduled) { | ||
|
@@ -476,7 +512,7 @@ bool PostPipeliner::scheduleFirstIteration(PostPipelinerStrategy &Strategy) { | |
} | ||
|
||
namespace { | ||
void dumpEarliestChain(const std::vector<NodeInfo> &Info, int N) { | ||
void dumpEarliestChain(const ScheduleInfo &Info, int N) { | ||
auto Prev = Info[N].LastEarliestPusher; | ||
if (Prev) { | ||
dumpEarliestChain(Info, *Prev); | ||
|
@@ -515,16 +551,20 @@ bool PostPipeliner::scheduleOtherIterations() { | |
|
||
class DefaultStrategy : public PostPipelinerStrategy { | ||
public: | ||
DefaultStrategy(ScheduleDAGMI &DAG, std::vector<NodeInfo> &Info, | ||
int LatestBias) | ||
DefaultStrategy(ScheduleDAGMI &DAG, ScheduleInfo &Info, int LatestBias) | ||
: PostPipelinerStrategy(DAG, Info, LatestBias) {} | ||
bool better(const SUnit &A, const SUnit &B) override { | ||
return Info[A.NodeNum].Latest < Info[B.NodeNum].Latest; | ||
} | ||
}; | ||
|
||
class ConfigStrategy : public PostPipelinerStrategy { | ||
protected: | ||
int II = 0; | ||
|
||
private: | ||
bool TopDown = true; | ||
bool Alternate = false; | ||
|
||
public: | ||
enum PriorityComponent { | ||
|
@@ -552,6 +592,13 @@ class ConfigStrategy : public PostPipelinerStrategy { | |
} | ||
return "Size - Illegal"; | ||
} | ||
struct Configuration { | ||
int ExtraStages = 0; | ||
bool TopDown = true; | ||
bool Alternate = false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be nice to document those fields, especially |
||
int Runs = 0; | ||
ArrayRef<PriorityComponent> Components; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not own the list of components? Does that help a lot with memory usage? |
||
}; | ||
|
||
private: | ||
std::string Name; | ||
|
@@ -597,6 +644,12 @@ class ConfigStrategy : public PostPipelinerStrategy { | |
return false; | ||
} | ||
|
||
int earliest(const SUnit &N) override { return Info[N.NodeNum].Earliest; } | ||
|
||
int latest(const SUnit &N) override { | ||
return Info[N.NodeNum].Latest + LatestBias; | ||
} | ||
|
||
void selected(const SUnit &N) override { | ||
// Promote the critical path | ||
NodeInfo *Pushed = &Info[N.NodeNum]; | ||
|
@@ -628,75 +681,71 @@ class ConfigStrategy : public PostPipelinerStrategy { | |
PredSiblingScheduled.insert(PDep.getSUnit()->NodeNum); | ||
} | ||
} | ||
if (Alternate) { | ||
TopDown = !TopDown; | ||
} | ||
} | ||
|
||
public: | ||
std::string name() override { return Name; } | ||
ConfigStrategy(ScheduleDAGInstrs &DAG, std::vector<NodeInfo> &Info, | ||
int Length, bool TopDown, | ||
ConfigStrategy(ScheduleDAGInstrs &DAG, ScheduleInfo &Info, int Length, int II, | ||
bool TopDown, bool Alternate, | ||
ArrayRef<PriorityComponent> Components) | ||
: PostPipelinerStrategy(DAG, Info, Length), TopDown(TopDown) { | ||
Name = "Config_" + std::to_string(Length) + "_" + std::to_string(TopDown); | ||
: PostPipelinerStrategy(DAG, Info, Length), II(II), TopDown(TopDown), | ||
Alternate(Alternate) { | ||
Name = "Config_" + std::to_string(Length) + "_" + std::to_string(TopDown) + | ||
"_" + std::to_string(Alternate); | ||
for (auto Comp : Components) { | ||
Name += "_" + getPriorityName(Comp); | ||
Priority.emplace_back(Comp); | ||
} | ||
} | ||
}; | ||
|
||
static const struct { | ||
int ExtraStages; | ||
bool TopDown; | ||
bool Rerun; | ||
ConfigStrategy::PriorityComponent Components[3]; | ||
} Strategies[] = { | ||
static const ConfigStrategy::PriorityComponent | ||
NodeNum[] = {ConfigStrategy::NodeNum}, | ||
Latest[] = {ConfigStrategy::Latest}, | ||
Critical[] = {ConfigStrategy::Critical}, | ||
CriticalLCDLatest[] = {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}; | ||
|
||
static const ConfigStrategy::Configuration Strategies[] = { | ||
// Loosely speaking, a lower value of the first parameter targets | ||
// a lower stage count, which benefits code size. | ||
// Rerurn is only useful for heuristics that use it, e.g. Critical | ||
{1, true, false, {ConfigStrategy::NodeNum}}, | ||
{1, true, false, {ConfigStrategy::Latest}}, | ||
{1, true, true, {ConfigStrategy::Critical}}, | ||
{1, true, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}}, | ||
{0, false, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}}, | ||
{1, false, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}}, | ||
// Runs>1 is only useful for heuristics that use it, e.g. Critical | ||
// {ExtraStages, TopDown, Alternate, Runs, Components} | ||
{1, true, false, 1, NodeNum}, | ||
{1, true, false, 1, Latest}, | ||
{1, true, false, 2, Critical}, | ||
{1, true, false, 2, CriticalLCDLatest}, | ||
{0, false, false, 2, CriticalLCDLatest}, | ||
{1, false, false, 2, CriticalLCDLatest}, | ||
// This is pure bottom up | ||
{1, false, false, {ConfigStrategy::NodeNum}}, | ||
{1, false, false, 1, NodeNum}, | ||
}; | ||
|
||
bool PostPipeliner::tryHeuristics() { | ||
int MinLength = computeMinScheduleLength(); | ||
|
||
DEBUG_SUMMARY(dbgs() << "-- MinLength=" << MinLength << "\n"); | ||
|
||
int HeuristicIndex = 0; | ||
for (auto &[ExtraStages, TopDown, Rerun, Components] : Strategies) { | ||
for (const auto &Config : Strategies) { | ||
if (Heuristic >= 0 && Heuristic != HeuristicIndex++) { | ||
continue; | ||
} | ||
ConfigStrategy S(*DAG, Info, MinLength + ExtraStages * II, TopDown, | ||
Components); | ||
ConfigStrategy S(*DAG, Info, MinLength + Config.ExtraStages * II, II, | ||
Config.TopDown, Config.Alternate, Config.Components); | ||
resetSchedule(/*FullReset=*/true); | ||
DEBUG_SUMMARY(dbgs() << "--- Strategy " << S.name() << "\n"); | ||
if (scheduleFirstIteration(S) && scheduleOtherIterations()) { | ||
DEBUG_SUMMARY(dbgs() << " Strategy " << S.name() << " found II=" << II | ||
for (int Run = 0; Run < Config.Runs; Run++) { | ||
DEBUG_SUMMARY(dbgs() << "--- Strategy " << S.name() << " run=" << Run | ||
<< "\n"); | ||
return true; | ||
} | ||
|
||
DEBUG_SUMMARY(dbgs() << " failed\n"); | ||
if (!Rerun) { | ||
continue; | ||
} | ||
|
||
// Rerun with dynamic information retained | ||
resetSchedule(/*FullReset=*/false); | ||
DEBUG_SUMMARY(dbgs() << "--- Strategy " << S.name() | ||
<< " with critical path"); | ||
if (scheduleFirstIteration(S) && scheduleOtherIterations()) { | ||
DEBUG_SUMMARY(dbgs() << " found II=" << II << "\n"); | ||
return true; | ||
if (scheduleFirstIteration(S) && scheduleOtherIterations()) { | ||
DEBUG_SUMMARY(dbgs() << " Strategy " << S.name() << " run=" << Run | ||
<< " found II=" << II << "\n"); | ||
return true; | ||
} | ||
resetSchedule(/*FullReset=*/false); | ||
} | ||
DEBUG_SUMMARY(dbgs() << " failed\n"); | ||
DEBUG_SUMMARY(dbgs() << " Strategy " << S.name() << " failed\n"); | ||
} | ||
DEBUG_SUMMARY(dbgs() << "=== II=" << II << " Failed ===\n"); | ||
return false; | ||
|
@@ -707,7 +756,7 @@ bool PostPipeliner::schedule(ScheduleDAGMI &TheDAG, int InitiationInterval) { | |
assert(NTotalInstrs % NInstr == 0); | ||
NCopies = NTotalInstrs / NInstr; | ||
if (NCopies == 1) { | ||
LLVM_DEBUG(dbgs() << "PostPipeliner: Not feasible\n"); | ||
LLVM_DEBUG(dbgs() << "PostPipeliner: Not feasible - Too few stages\n"); | ||
return false; | ||
} | ||
II = InitiationInterval; | ||
|
@@ -718,15 +767,19 @@ bool PostPipeliner::schedule(ScheduleDAGMI &TheDAG, int InitiationInterval) { | |
Depth = NCopies * II + HR.getPipelineDepth(); | ||
Scoreboard.reset(Depth); | ||
|
||
Info.clear(); | ||
Info.resize(NTotalInstrs); | ||
Info.init(NInstr, NCopies); | ||
|
||
LLVM_DEBUG(for (int I = 0; I < NInstr; | ||
I++) { dbgs() << I << " " << *DAG->SUnits[I].getInstr(); }); | ||
LLVM_DEBUG(dumpGraph(NInstr, Info, DAG)); | ||
LLVM_DEBUG(dumpGraph(Info, DAG)); | ||
|
||
computeLoopCarriedParameters(); | ||
bool Feasible = computeLoopCarriedParameters(); | ||
if (!Feasible) { | ||
LLVM_DEBUG(dbgs() << "PostPipeliner: Not feasible - RecMII\n"); | ||
return false; | ||
} | ||
|
||
LLVM_DEBUG(dumpIntervals(Info, computeMinScheduleLength())); | ||
if (!tryHeuristics()) { | ||
LLVM_DEBUG(dbgs() << "PostPipeliner: No schedule found\n"); | ||
return false; | ||
|
@@ -838,4 +891,15 @@ void NodeInfo::reset(bool FullReset) { | |
} | ||
} | ||
|
||
void ScheduleInfo::compute() { | ||
MinEarliest = 0; | ||
MaxEarliest = 0; | ||
MinLatest = -1; | ||
for (int K = 0; K < NInstr; K++) { | ||
MinEarliest = std::min(MinEarliest, Nodes[K].Earliest); | ||
MaxEarliest = std::max(MaxEarliest, Nodes[K].Earliest); | ||
MinLatest = std::min(MinLatest, Nodes[K].Latest); | ||
} | ||
} | ||
|
||
} // namespace llvm::AIE |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rationale: It is difficult to give a useful absolute value. for small ResMII we will be trying way beyond usefulness, for larger ResMII it may not be enough. A relative amount works better.