llvm.org GIT mirror llvm / a99d6d2
Resubmit rL345008 "Split MachinePipeliner code into header and cpp files" The commit caused unclear failures in http://green.lab.llvm.org/green//job/lldb-cmake/ will revert if the error reappears Differential Revision: https://reviews.llvm.org/D56084 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350290 91177308-0d34-0410-b5e6-96231b3b80d8 Lama Saba 8 months ago
2 changed file(s) with 619 addition(s) and 595 deletion(s). Raw diff Collapse all Expand all
0 //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // Software pipelining (SWP) is an instruction scheduling technique for loops
12 // that overlap loop iterations and exploits ILP via a compiler transformation.
13 //
14 // Swing Modulo Scheduling is an implementation of software pipelining
15 // that generates schedules that are near optimal in terms of initiation
16 // interval, register requirements, and stage count. See the papers:
17 //
18 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
19 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
20 // Conference on Parallel Architectures and Compilation Techiniques.
21 //
22 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
23 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
24 // Transactions on Computers, Vol. 50, No. 3, 2001.
25 //
26 // "An Implementation of Swing Modulo Scheduling With Extensions for
27 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
28 // Urbana-Chambpain, 2005.
29 //
30 //
31 // The SMS algorithm consists of three main steps after computing the minimal
32 // initiation interval (MII).
33 // 1) Analyze the dependence graph and compute information about each
34 // instruction in the graph.
35 // 2) Order the nodes (instructions) by priority based upon the heuristics
36 // described in the algorithm.
37 // 3) Attempt to schedule the nodes in the specified order using the MII.
38 //
39 //===----------------------------------------------------------------------===//
40 #ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
41 #define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
42
43 #include "llvm/CodeGen/RegisterClassInfo.h"
44 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
45 #include "llvm/CodeGen/TargetInstrInfo.h"
46
47 namespace llvm {
48
49 class NodeSet;
50 class SMSchedule;
51
52 extern cl::opt SwpEnableCopyToPhi;
53
54 /// The main class in the implementation of the target independent
55 /// software pipeliner pass.
56 class MachinePipeliner : public MachineFunctionPass {
57 public:
58 MachineFunction *MF = nullptr;
59 const MachineLoopInfo *MLI = nullptr;
60 const MachineDominatorTree *MDT = nullptr;
61 const InstrItineraryData *InstrItins;
62 const TargetInstrInfo *TII = nullptr;
63 RegisterClassInfo RegClassInfo;
64
65 #ifndef NDEBUG
66 static int NumTries;
67 #endif
68
69 /// Cache the target analysis information about the loop.
70 struct LoopInfo {
71 MachineBasicBlock *TBB = nullptr;
72 MachineBasicBlock *FBB = nullptr;
73 SmallVector BrCond;
74 MachineInstr *LoopInductionVar = nullptr;
75 MachineInstr *LoopCompare = nullptr;
76 };
77 LoopInfo LI;
78
79 static char ID;
80
81 MachinePipeliner() : MachineFunctionPass(ID) {
82 initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
83 }
84
85 bool runOnMachineFunction(MachineFunction &MF) override;
86
87 void getAnalysisUsage(AnalysisUsage &AU) const override {
88 AU.addRequired();
89 AU.addPreserved();
90 AU.addRequired();
91 AU.addRequired();
92 AU.addRequired();
93 MachineFunctionPass::getAnalysisUsage(AU);
94 }
95
96 private:
97 void preprocessPhiNodes(MachineBasicBlock &B);
98 bool canPipelineLoop(MachineLoop &L);
99 bool scheduleLoop(MachineLoop &L);
100 bool swingModuloScheduler(MachineLoop &L);
101 };
102
103 /// This class builds the dependence graph for the instructions in a loop,
104 /// and attempts to schedule the instructions using the SMS algorithm.
105 class SwingSchedulerDAG : public ScheduleDAGInstrs {
106 MachinePipeliner &Pass;
107 /// The minimum initiation interval between iterations for this schedule.
108 unsigned MII = 0;
109 /// Set to true if a valid pipelined schedule is found for the loop.
110 bool Scheduled = false;
111 MachineLoop &Loop;
112 LiveIntervals &LIS;
113 const RegisterClassInfo &RegClassInfo;
114
115 /// A toplogical ordering of the SUnits, which is needed for changing
116 /// dependences and iterating over the SUnits.
117 ScheduleDAGTopologicalSort Topo;
118
119 struct NodeInfo {
120 int ASAP = 0;
121 int ALAP = 0;
122 int ZeroLatencyDepth = 0;
123 int ZeroLatencyHeight = 0;
124
125 NodeInfo() = default;
126 };
127 /// Computed properties for each node in the graph.
128 std::vector ScheduleInfo;
129
130 enum OrderKind { BottomUp = 0, TopDown = 1 };
131 /// Computed node ordering for scheduling.
132 SetVector NodeOrder;
133
134 using NodeSetType = SmallVector;
135 using ValueMapTy = DenseMap;
136 using MBBVectorTy = SmallVectorImpl;
137 using InstrMapTy = DenseMap;
138
139 /// Instructions to change when emitting the final schedule.
140 DenseMap> InstrChanges;
141
142 /// We may create a new instruction, so remember it because it
143 /// must be deleted when the pass is finished.
144 SmallPtrSet NewMIs;
145
146 /// Ordered list of DAG postprocessing steps.
147 std::vector> Mutations;
148
149 /// Helper class to implement Johnson's circuit finding algorithm.
150 class Circuits {
151 std::vector &SUnits;
152 SetVector Stack;
153 BitVector Blocked;
154 SmallVector, 10> B;
155 SmallVector, 16> AdjK;
156 // Node to Index from ScheduleDAGTopologicalSort
157 std::vector *Node2Idx;
158 unsigned NumPaths;
159 static unsigned MaxPaths;
160
161 public:
162 Circuits(std::vector &SUs, ScheduleDAGTopologicalSort &Topo)
163 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
164 Node2Idx = new std::vector(SUs.size());
165 unsigned Idx = 0;
166 for (const auto &NodeNum : Topo)
167 Node2Idx->at(NodeNum) = Idx++;
168 }
169
170 ~Circuits() { delete Node2Idx; }
171
172 /// Reset the data structures used in the circuit algorithm.
173 void reset() {
174 Stack.clear();
175 Blocked.reset();
176 B.assign(SUnits.size(), SmallPtrSet());
177 NumPaths = 0;
178 }
179
180 void createAdjacencyStructure(SwingSchedulerDAG *DAG);
181 bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
182 void unblock(int U);
183 };
184
185 struct CopyToPhiMutation : public ScheduleDAGMutation {
186 void apply(ScheduleDAGInstrs *DAG) override;
187 };
188
189 public:
190 SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
191 const RegisterClassInfo &rci)
192 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
193 RegClassInfo(rci), Topo(SUnits, &ExitSU) {
194 P.MF->getSubtarget().getSMSMutations(Mutations);
195 if (SwpEnableCopyToPhi)
196 Mutations.push_back(llvm::make_unique());
197 }
198
199 void schedule() override;
200 void finishBlock() override;
201
202 /// Return true if the loop kernel has been scheduled.
203 bool hasNewSchedule() { return Scheduled; }
204
205 /// Return the earliest time an instruction may be scheduled.
206 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
207
208 /// Return the latest time an instruction my be scheduled.
209 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
210
211 /// The mobility function, which the number of slots in which
212 /// an instruction may be scheduled.
213 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
214
215 /// The depth, in the dependence graph, for a node.
216 unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
217
218 /// The maximum unweighted length of a path from an arbitrary node to the
219 /// given node in which each edge has latency 0
220 int getZeroLatencyDepth(SUnit *Node) {
221 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
222 }
223
224 /// The height, in the dependence graph, for a node.
225 unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
226
227 /// The maximum unweighted length of a path from the given node to an
228 /// arbitrary node in which each edge has latency 0
229 int getZeroLatencyHeight(SUnit *Node) {
230 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
231 }
232
233 /// Return true if the dependence is a back-edge in the data dependence graph.
234 /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
235 /// using an anti dependence from a Phi to an instruction.
236 bool isBackedge(SUnit *Source, const SDep &Dep) {
237 if (Dep.getKind() != SDep::Anti)
238 return false;
239 return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
240 }
241
242 bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
243
244 /// The distance function, which indicates that operation V of iteration I
245 /// depends on operations U of iteration I-distance.
246 unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
247 // Instructions that feed a Phi have a distance of 1. Computing larger
248 // values for arrays requires data dependence information.
249 if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
250 return 1;
251 return 0;
252 }
253
254 /// Set the Minimum Initiation Interval for this schedule attempt.
255 void setMII(unsigned mii) { MII = mii; }
256
257 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
258
259 void fixupRegisterOverlaps(std::deque &Instrs);
260
261 /// Return the new base register that was stored away for the changed
262 /// instruction.
263 unsigned getInstrBaseReg(SUnit *SU) {
264 DenseMap>::iterator It =
265 InstrChanges.find(SU);
266 if (It != InstrChanges.end())
267 return It->second.first;
268 return 0;
269 }
270
271 void addMutation(std::unique_ptr Mutation) {
272 Mutations.push_back(std::move(Mutation));
273 }
274
275 static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
276
277 private:
278 void addLoopCarriedDependences(AliasAnalysis *AA);
279 void updatePhiDependences();
280 void changeDependences();
281 unsigned calculateResMII();
282 unsigned calculateRecMII(NodeSetType &RecNodeSets);
283 void findCircuits(NodeSetType &NodeSets);
284 void fuseRecs(NodeSetType &NodeSets);
285 void removeDuplicateNodes(NodeSetType &NodeSets);
286 void computeNodeFunctions(NodeSetType &NodeSets);
287 void registerPressureFilter(NodeSetType &NodeSets);
288 void colocateNodeSets(NodeSetType &NodeSets);
289 void checkNodeSets(NodeSetType &NodeSets);
290 void groupRemainingNodes(NodeSetType &NodeSets);
291 void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
292 SetVector &NodesAdded);
293 void computeNodeOrder(NodeSetType &NodeSets);
294 void checkValidNodeOrder(const NodeSetType &Circuits) const;
295 bool schedulePipeline(SMSchedule &Schedule);
296 void generatePipelinedLoop(SMSchedule &Schedule);
297 void generateProlog(SMSchedule &Schedule, unsigned LastStage,
298 MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
299 MBBVectorTy &PrologBBs);
300 void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
301 MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
302 MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
303 void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
304 MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
305 SMSchedule &Schedule, ValueMapTy *VRMap,
306 InstrMapTy &InstrMap, unsigned LastStageNum,
307 unsigned CurStageNum, bool IsLast);
308 void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
309 MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
310 SMSchedule &Schedule, ValueMapTy *VRMap,
311 InstrMapTy &InstrMap, unsigned LastStageNum,
312 unsigned CurStageNum, bool IsLast);
313 void removeDeadInstructions(MachineBasicBlock *KernelBB,
314 MBBVectorTy &EpilogBBs);
315 void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
316 SMSchedule &Schedule);
317 void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
318 MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
319 ValueMapTy *VRMap);
320 bool computeDelta(MachineInstr &MI, unsigned &Delta);
321 void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
322 unsigned Num);
323 MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
324 unsigned InstStageNum);
325 MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
326 unsigned InstStageNum,
327 SMSchedule &Schedule);
328 void updateInstruction(MachineInstr *NewMI, bool LastDef,
329 unsigned CurStageNum, unsigned InstrStageNum,
330 SMSchedule &Schedule, ValueMapTy *VRMap);
331 MachineInstr *findDefInLoop(unsigned Reg);
332 unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
333 unsigned LoopStage, ValueMapTy *VRMap,
334 MachineBasicBlock *BB);
335 void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
336 SMSchedule &Schedule, ValueMapTy *VRMap,
337 InstrMapTy &InstrMap);
338 void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
339 InstrMapTy &InstrMap, unsigned CurStageNum,
340 unsigned PhiNum, MachineInstr *Phi,
341 unsigned OldReg, unsigned NewReg,
342 unsigned PrevReg = 0);
343 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
344 unsigned &OffsetPos, unsigned &NewBase,
345 int64_t &NewOffset);
346 void postprocessDAG();
347 };
348
349 /// A NodeSet contains a set of SUnit DAG nodes with additional information
350 /// that assigns a priority to the set.
351 class NodeSet {
352 SetVector Nodes;
353 bool HasRecurrence = false;
354 unsigned RecMII = 0;
355 int MaxMOV = 0;
356 unsigned MaxDepth = 0;
357 unsigned Colocate = 0;
358 SUnit *ExceedPressure = nullptr;
359 unsigned Latency = 0;
360
361 public:
362 using iterator = SetVector::const_iterator;
363
364 NodeSet() = default;
365 NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
366 Latency = 0;
367 for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
368 for (const SDep &Succ : Nodes[i]->Succs)
369 if (Nodes.count(Succ.getSUnit()))
370 Latency += Succ.getLatency();
371 }
372
373 bool insert(SUnit *SU) { return Nodes.insert(SU); }
374
375 void insert(iterator S, iterator E) { Nodes.insert(S, E); }
376
377 template bool remove_if(UnaryPredicate P) {
378 return Nodes.remove_if(P);
379 }
380
381 unsigned count(SUnit *SU) const { return Nodes.count(SU); }
382
383 bool hasRecurrence() { return HasRecurrence; };
384
385 unsigned size() const { return Nodes.size(); }
386
387 bool empty() const { return Nodes.empty(); }
388
389 SUnit *getNode(unsigned i) const { return Nodes[i]; };
390
391 void setRecMII(unsigned mii) { RecMII = mii; };
392
393 void setColocate(unsigned c) { Colocate = c; };
394
395 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
396
397 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
398
399 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
400
401 int getRecMII() { return RecMII; }
402
403 /// Summarize node functions for the entire node set.
404 void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
405 for (SUnit *SU : *this) {
406 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
407 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
408 }
409 }
410
411 unsigned getLatency() { return Latency; }
412
413 unsigned getMaxDepth() { return MaxDepth; }
414
415 void clear() {
416 Nodes.clear();
417 RecMII = 0;
418 HasRecurrence = false;
419 MaxMOV = 0;
420 MaxDepth = 0;
421 Colocate = 0;
422 ExceedPressure = nullptr;
423 }
424
425 operator SetVector &() { return Nodes; }
426
427 /// Sort the node sets by importance. First, rank them by recurrence MII,
428 /// then by mobility (least mobile done first), and finally by depth.
429 /// Each node set may contain a colocate value which is used as the first
430 /// tie breaker, if it's set.
431 bool operator>(const NodeSet &RHS) const {
432 if (RecMII == RHS.RecMII) {
433 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
434 return Colocate < RHS.Colocate;
435 if (MaxMOV == RHS.MaxMOV)
436 return MaxDepth > RHS.MaxDepth;
437 return MaxMOV < RHS.MaxMOV;
438 }
439 return RecMII > RHS.RecMII;
440 }
441
442 bool operator==(const NodeSet &RHS) const {
443 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
444 MaxDepth == RHS.MaxDepth;
445 }
446
447 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
448
449 iterator begin() { return Nodes.begin(); }
450 iterator end() { return Nodes.end(); }
451
452 void print(raw_ostream &os) const {
453 os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
454 << " depth " << MaxDepth << " col " << Colocate << "\n";
455 for (const auto &I : Nodes)
456 os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
457 os << "\n";
458 }
459
460 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
461 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
462 #endif
463 };
464
465 /// This class represents the scheduled code. The main data structure is a
466 /// map from scheduled cycle to instructions. During scheduling, the
467 /// data structure explicitly represents all stages/iterations. When
468 /// the algorithm finshes, the schedule is collapsed into a single stage,
469 /// which represents instructions from different loop iterations.
470 ///
471 /// The SMS algorithm allows negative values for cycles, so the first cycle
472 /// in the schedule is the smallest cycle value.
473 class SMSchedule {
474 private:
475 /// Map from execution cycle to instructions.
476 DenseMap> ScheduledInstrs;
477
478 /// Map from instruction to execution cycle.
479 std::map InstrToCycle;
480
481 /// Map for each register and the max difference between its uses and def.
482 /// The first element in the pair is the max difference in stages. The
483 /// second is true if the register defines a Phi value and loop value is
484 /// scheduled before the Phi.
485 std::map> RegToStageDiff;
486
487 /// Keep track of the first cycle value in the schedule. It starts
488 /// as zero, but the algorithm allows negative values.
489 int FirstCycle = 0;
490
491 /// Keep track of the last cycle value in the schedule.
492 int LastCycle = 0;
493
494 /// The initiation interval (II) for the schedule.
495 int InitiationInterval = 0;
496
497 /// Target machine information.
498 const TargetSubtargetInfo &ST;
499
500 /// Virtual register information.
501 MachineRegisterInfo &MRI;
502
503 std::unique_ptr Resources;
504
505 public:
506 SMSchedule(MachineFunction *mf)
507 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
508 Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
509
510 void reset() {
511 ScheduledInstrs.clear();
512 InstrToCycle.clear();
513 RegToStageDiff.clear();
514 FirstCycle = 0;
515 LastCycle = 0;
516 InitiationInterval = 0;
517 }
518
519 /// Set the initiation interval for this schedule.
520 void setInitiationInterval(int ii) { InitiationInterval = ii; }
521
522 /// Return the first cycle in the completed schedule. This
523 /// can be a negative value.
524 int getFirstCycle() const { return FirstCycle; }
525
526 /// Return the last cycle in the finalized schedule.
527 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
528
529 /// Return the cycle of the earliest scheduled instruction in the dependence
530 /// chain.
531 int earliestCycleInChain(const SDep &Dep);
532
533 /// Return the cycle of the latest scheduled instruction in the dependence
534 /// chain.
535 int latestCycleInChain(const SDep &Dep);
536
537 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
538 int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
539 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
540
541 /// Iterators for the cycle to instruction map.
542 using sched_iterator = DenseMap>::iterator;
543 using const_sched_iterator =
544 DenseMap>::const_iterator;
545
546 /// Return true if the instruction is scheduled at the specified stage.
547 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
548 return (stageScheduled(SU) == (int)StageNum);
549 }
550
551 /// Return the stage for a scheduled instruction. Return -1 if
552 /// the instruction has not been scheduled.
553 int stageScheduled(SUnit *SU) const {
554 std::map::const_iterator it = InstrToCycle.find(SU);
555 if (it == InstrToCycle.end())
556 return -1;
557 return (it->second - FirstCycle) / InitiationInterval;
558 }
559
560 /// Return the cycle for a scheduled instruction. This function normalizes
561 /// the first cycle to be 0.
562 unsigned cycleScheduled(SUnit *SU) const {
563 std::map::const_iterator it = InstrToCycle.find(SU);
564 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
565 return (it->second - FirstCycle) % InitiationInterval;
566 }
567
568 /// Return the maximum stage count needed for this schedule.
569 unsigned getMaxStageCount() {
570 return (LastCycle - FirstCycle) / InitiationInterval;
571 }
572
573 /// Return the max. number of stages/iterations that can occur between a
574 /// register definition and its uses.
575 unsigned getStagesForReg(int Reg, unsigned CurStage) {
576 std::pair Stages = RegToStageDiff[Reg];
577 if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
578 return 1;
579 return Stages.first;
580 }
581
582 /// The number of stages for a Phi is a little different than other
583 /// instructions. The minimum value computed in RegToStageDiff is 1
584 /// because we assume the Phi is needed for at least 1 iteration.
585 /// This is not the case if the loop value is scheduled prior to the
586 /// Phi in the same stage. This function returns the number of stages
587 /// or iterations needed between the Phi definition and any uses.
588 unsigned getStagesForPhi(int Reg) {
589 std::pair Stages = RegToStageDiff[Reg];
590 if (Stages.second)
591 return Stages.first;
592 return Stages.first - 1;
593 }
594
595 /// Return the instructions that are scheduled at the specified cycle.
596 std::deque &getInstructions(int cycle) {
597 return ScheduledInstrs[cycle];
598 }
599
600 bool isValidSchedule(SwingSchedulerDAG *SSD);
601 void finalizeSchedule(SwingSchedulerDAG *SSD);
602 void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
603 std::deque &Insts);
604 bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
605 bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
606 MachineOperand &MO);
607 void print(raw_ostream &os) const;
608 void dump() const;
609 };
610
611 } // end namespace llvm
612
613 #endif // LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
77 //===----------------------------------------------------------------------===//
88 //
99 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // Software pipelining (SWP) is an instruction scheduling technique for loops
12 // that overlap loop iterations and exploits ILP via a compiler transformation.
13 //
14 // Swing Modulo Scheduling is an implementation of software pipelining
15 // that generates schedules that are near optimal in terms of initiation
16 // interval, register requirements, and stage count. See the papers:
17 //
18 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
19 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
20 // Conference on Parallel Architectures and Compilation Techiniques.
21 //
22 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
23 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
24 // Transactions on Computers, Vol. 50, No. 3, 2001.
25 //
26 // "An Implementation of Swing Modulo Scheduling With Extensions for
27 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
28 // Urbana-Chambpain, 2005.
29 //
30 //
31 // The SMS algorithm consists of three main steps after computing the minimal
32 // initiation interval (MII).
33 // 1) Analyze the dependence graph and compute information about each
34 // instruction in the graph.
35 // 2) Order the nodes (instructions) by priority based upon the heuristics
36 // described in the algorithm.
37 // 3) Attempt to schedule the nodes in the specified order using the MII.
3810 //
3911 // This SMS implementation is a target-independent back-end pass. When enabled,
4012 // the pass runs just prior to the register allocation pass, while the machine
8254 #include "llvm/CodeGen/MachineLoopInfo.h"
8355 #include "llvm/CodeGen/MachineMemOperand.h"
8456 #include "llvm/CodeGen/MachineOperand.h"
57 #include "llvm/CodeGen/MachinePipeliner.h"
8558 #include "llvm/CodeGen/MachineRegisterInfo.h"
86 #include "llvm/CodeGen/RegisterClassInfo.h"
8759 #include "llvm/CodeGen/RegisterPressure.h"
8860 #include "llvm/CodeGen/ScheduleDAG.h"
89 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
9061 #include "llvm/CodeGen/ScheduleDAGMutation.h"
91 #include "llvm/CodeGen/TargetInstrInfo.h"
9262 #include "llvm/CodeGen/TargetOpcodes.h"
9363 #include "llvm/CodeGen/TargetRegisterInfo.h"
9464 #include "llvm/CodeGen/TargetSubtargetInfo.h"
170140 cl::ReallyHidden, cl::init(false),
171141 cl::ZeroOrMore, cl::desc("Ignore RecMII"));
172142
143 namespace llvm {
144
173145 // A command line option to enable the CopyToPhi DAG mutation.
174 static cl::opt
146 cl::opt
175147 SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
176148 cl::init(true), cl::ZeroOrMore,
177149 cl::desc("Enable CopyToPhi DAG Mutation"));
178150
179 namespace {
180
181 class NodeSet;
182 class SMSchedule;
183
184 /// The main class in the implementation of the target independent
185 /// software pipeliner pass.
186 class MachinePipeliner : public MachineFunctionPass {
187 public:
188 MachineFunction *MF = nullptr;
189 const MachineLoopInfo *MLI = nullptr;
190 const MachineDominatorTree *MDT = nullptr;
191 const InstrItineraryData *InstrItins;
192 const TargetInstrInfo *TII = nullptr;
193 RegisterClassInfo RegClassInfo;
194
195 #ifndef NDEBUG
196 static int NumTries;
197 #endif
198
199 /// Cache the target analysis information about the loop.
200 struct LoopInfo {
201 MachineBasicBlock *TBB = nullptr;
202 MachineBasicBlock *FBB = nullptr;
203 SmallVector BrCond;
204 MachineInstr *LoopInductionVar = nullptr;
205 MachineInstr *LoopCompare = nullptr;
206 };
207 LoopInfo LI;
208
209 static char ID;
210
211 MachinePipeliner() : MachineFunctionPass(ID) {
212 initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
213 }
214
215 bool runOnMachineFunction(MachineFunction &MF) override;
216
217 void getAnalysisUsage(AnalysisUsage &AU) const override {
218 AU.addRequired();
219 AU.addPreserved();
220 AU.addRequired();
221 AU.addRequired();
222 AU.addRequired();
223 MachineFunctionPass::getAnalysisUsage(AU);
224 }
225
226 private:
227 void preprocessPhiNodes(MachineBasicBlock &B);
228 bool canPipelineLoop(MachineLoop &L);
229 bool scheduleLoop(MachineLoop &L);
230 bool swingModuloScheduler(MachineLoop &L);
231 };
232
233 /// This class builds the dependence graph for the instructions in a loop,
234 /// and attempts to schedule the instructions using the SMS algorithm.
235 class SwingSchedulerDAG : public ScheduleDAGInstrs {
236 MachinePipeliner &Pass;
237 /// The minimum initiation interval between iterations for this schedule.
238 unsigned MII = 0;
239 /// Set to true if a valid pipelined schedule is found for the loop.
240 bool Scheduled = false;
241 MachineLoop &Loop;
242 LiveIntervals &LIS;
243 const RegisterClassInfo &RegClassInfo;
244
245 /// A toplogical ordering of the SUnits, which is needed for changing
246 /// dependences and iterating over the SUnits.
247 ScheduleDAGTopologicalSort Topo;
248
249 struct NodeInfo {
250 int ASAP = 0;
251 int ALAP = 0;
252 int ZeroLatencyDepth = 0;
253 int ZeroLatencyHeight = 0;
254
255 NodeInfo() = default;
256 };
257 /// Computed properties for each node in the graph.
258 std::vector ScheduleInfo;
259
260 enum OrderKind { BottomUp = 0, TopDown = 1 };
261 /// Computed node ordering for scheduling.
262 SetVector NodeOrder;
263
264 using NodeSetType = SmallVector;
265 using ValueMapTy = DenseMap;
266 using MBBVectorTy = SmallVectorImpl;
267 using InstrMapTy = DenseMap;
268
269 /// Instructions to change when emitting the final schedule.
270 DenseMap> InstrChanges;
271
272 /// We may create a new instruction, so remember it because it
273 /// must be deleted when the pass is finished.
274 SmallPtrSet NewMIs;
275
276 /// Ordered list of DAG postprocessing steps.
277 std::vector> Mutations;
278
279 /// Helper class to implement Johnson's circuit finding algorithm.
280 class Circuits {
281 std::vector &SUnits;
282 SetVector Stack;
283 BitVector Blocked;
284 SmallVector, 10> B;
285 SmallVector, 16> AdjK;
286 // Node to Index from ScheduleDAGTopologicalSort
287 std::vector *Node2Idx;
288 unsigned NumPaths;
289 static unsigned MaxPaths;
290
291 public:
292 Circuits(std::vector &SUs, ScheduleDAGTopologicalSort &Topo)
293 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
294 Node2Idx = new std::vector(SUs.size());
295 unsigned Idx = 0;
296 for (const auto &NodeNum : Topo)
297 Node2Idx->at(NodeNum) = Idx++;
298 }
299
300 ~Circuits() { delete Node2Idx; }
301
302 /// Reset the data structures used in the circuit algorithm.
303 void reset() {
304 Stack.clear();
305 Blocked.reset();
306 B.assign(SUnits.size(), SmallPtrSet());
307 NumPaths = 0;
308 }
309
310 void createAdjacencyStructure(SwingSchedulerDAG *DAG);
311 bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
312 void unblock(int U);
313 };
314
315 struct CopyToPhiMutation : public ScheduleDAGMutation {
316 void apply(ScheduleDAGInstrs *DAG) override;
317 };
318
319 public:
320 SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
321 const RegisterClassInfo &rci)
322 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
323 RegClassInfo(rci), Topo(SUnits, &ExitSU) {
324 P.MF->getSubtarget().getSMSMutations(Mutations);
325 if (SwpEnableCopyToPhi)
326 Mutations.push_back(llvm::make_unique());
327 }
328
329 void schedule() override;
330 void finishBlock() override;
331
332 /// Return true if the loop kernel has been scheduled.
333 bool hasNewSchedule() { return Scheduled; }
334
335 /// Return the earliest time an instruction may be scheduled.
336 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
337
338 /// Return the latest time an instruction my be scheduled.
339 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
340
341 /// The mobility function, which the number of slots in which
342 /// an instruction may be scheduled.
343 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
344
345 /// The depth, in the dependence graph, for a node.
346 unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
347
348 /// The maximum unweighted length of a path from an arbitrary node to the
349 /// given node in which each edge has latency 0
350 int getZeroLatencyDepth(SUnit *Node) {
351 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
352 }
353
354 /// The height, in the dependence graph, for a node.
355 unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
356
357 /// The maximum unweighted length of a path from the given node to an
358 /// arbitrary node in which each edge has latency 0
359 int getZeroLatencyHeight(SUnit *Node) {
360 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
361 }
362
363 /// Return true if the dependence is a back-edge in the data dependence graph.
364 /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
365 /// using an anti dependence from a Phi to an instruction.
366 bool isBackedge(SUnit *Source, const SDep &Dep) {
367 if (Dep.getKind() != SDep::Anti)
368 return false;
369 return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
370 }
371
372 bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
373
374 /// The distance function, which indicates that operation V of iteration I
375 /// depends on operations U of iteration I-distance.
376 unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
377 // Instructions that feed a Phi have a distance of 1. Computing larger
378 // values for arrays requires data dependence information.
379 if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
380 return 1;
381 return 0;
382 }
383
384 /// Set the Minimum Initiation Interval for this schedule attempt.
385 void setMII(unsigned mii) { MII = mii; }
386
387 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
388
389 void fixupRegisterOverlaps(std::deque &Instrs);
390
391 /// Return the new base register that was stored away for the changed
392 /// instruction.
393 unsigned getInstrBaseReg(SUnit *SU) {
394 DenseMap>::iterator It =
395 InstrChanges.find(SU);
396 if (It != InstrChanges.end())
397 return It->second.first;
398 return 0;
399 }
400
401 void addMutation(std::unique_ptr Mutation) {
402 Mutations.push_back(std::move(Mutation));
403 }
404
405 static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
406
407 private:
408 void addLoopCarriedDependences(AliasAnalysis *AA);
409 void updatePhiDependences();
410 void changeDependences();
411 unsigned calculateResMII();
412 unsigned calculateRecMII(NodeSetType &RecNodeSets);
413 void findCircuits(NodeSetType &NodeSets);
414 void fuseRecs(NodeSetType &NodeSets);
415 void removeDuplicateNodes(NodeSetType &NodeSets);
416 void computeNodeFunctions(NodeSetType &NodeSets);
417 void registerPressureFilter(NodeSetType &NodeSets);
418 void colocateNodeSets(NodeSetType &NodeSets);
419 void checkNodeSets(NodeSetType &NodeSets);
420 void groupRemainingNodes(NodeSetType &NodeSets);
421 void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
422 SetVector &NodesAdded);
423 void computeNodeOrder(NodeSetType &NodeSets);
424 void checkValidNodeOrder(const NodeSetType &Circuits) const;
425 bool schedulePipeline(SMSchedule &Schedule);
426 void generatePipelinedLoop(SMSchedule &Schedule);
427 void generateProlog(SMSchedule &Schedule, unsigned LastStage,
428 MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
429 MBBVectorTy &PrologBBs);
430 void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
431 MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
432 MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
433 void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
434 MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
435 SMSchedule &Schedule, ValueMapTy *VRMap,
436 InstrMapTy &InstrMap, unsigned LastStageNum,
437 unsigned CurStageNum, bool IsLast);
438 void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
439 MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
440 SMSchedule &Schedule, ValueMapTy *VRMap,
441 InstrMapTy &InstrMap, unsigned LastStageNum,
442 unsigned CurStageNum, bool IsLast);
443 void removeDeadInstructions(MachineBasicBlock *KernelBB,
444 MBBVectorTy &EpilogBBs);
445 void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
446 SMSchedule &Schedule);
447 void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
448 MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
449 ValueMapTy *VRMap);
450 bool computeDelta(MachineInstr &MI, unsigned &Delta);
451 void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
452 unsigned Num);
453 MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
454 unsigned InstStageNum);
455 MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
456 unsigned InstStageNum,
457 SMSchedule &Schedule);
458 void updateInstruction(MachineInstr *NewMI, bool LastDef,
459 unsigned CurStageNum, unsigned InstrStageNum,
460 SMSchedule &Schedule, ValueMapTy *VRMap);
461 MachineInstr *findDefInLoop(unsigned Reg);
462 unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
463 unsigned LoopStage, ValueMapTy *VRMap,
464 MachineBasicBlock *BB);
465 void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
466 SMSchedule &Schedule, ValueMapTy *VRMap,
467 InstrMapTy &InstrMap);
468 void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
469 InstrMapTy &InstrMap, unsigned CurStageNum,
470 unsigned PhiNum, MachineInstr *Phi,
471 unsigned OldReg, unsigned NewReg,
472 unsigned PrevReg = 0);
473 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
474 unsigned &OffsetPos, unsigned &NewBase,
475 int64_t &NewOffset);
476 void postprocessDAG();
477 };
478
479 /// A NodeSet contains a set of SUnit DAG nodes with additional information
480 /// that assigns a priority to the set.
481 class NodeSet {
482 SetVector Nodes;
483 bool HasRecurrence = false;
484 unsigned RecMII = 0;
485 int MaxMOV = 0;
486 unsigned MaxDepth = 0;
487 unsigned Colocate = 0;
488 SUnit *ExceedPressure = nullptr;
489 unsigned Latency = 0;
490
491 public:
492 using iterator = SetVector::const_iterator;
493
494 NodeSet() = default;
495 NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
496 Latency = 0;
497 for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
498 for (const SDep &Succ : Nodes[i]->Succs)
499 if (Nodes.count(Succ.getSUnit()))
500 Latency += Succ.getLatency();
501 }
502
503 bool insert(SUnit *SU) { return Nodes.insert(SU); }
504
505 void insert(iterator S, iterator E) { Nodes.insert(S, E); }
506
507 template bool remove_if(UnaryPredicate P) {
508 return Nodes.remove_if(P);
509 }
510
511 unsigned count(SUnit *SU) const { return Nodes.count(SU); }
512
513 bool hasRecurrence() { return HasRecurrence; };
514
515 unsigned size() const { return Nodes.size(); }
516
517 bool empty() const { return Nodes.empty(); }
518
519 SUnit *getNode(unsigned i) const { return Nodes[i]; };
520
521 void setRecMII(unsigned mii) { RecMII = mii; };
522
523 void setColocate(unsigned c) { Colocate = c; };
524
525 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
526
527 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
528
529 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
530
531 int getRecMII() { return RecMII; }
532
533 /// Summarize node functions for the entire node set.
534 void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
535 for (SUnit *SU : *this) {
536 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
537 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
538 }
539 }
540
541 unsigned getLatency() { return Latency; }
542
543 unsigned getMaxDepth() { return MaxDepth; }
544
545 void clear() {
546 Nodes.clear();
547 RecMII = 0;
548 HasRecurrence = false;
549 MaxMOV = 0;
550 MaxDepth = 0;
551 Colocate = 0;
552 ExceedPressure = nullptr;
553 }
554
555 operator SetVector &() { return Nodes; }
556
557 /// Sort the node sets by importance. First, rank them by recurrence MII,
558 /// then by mobility (least mobile done first), and finally by depth.
559 /// Each node set may contain a colocate value which is used as the first
560 /// tie breaker, if it's set.
561 bool operator>(const NodeSet &RHS) const {
562 if (RecMII == RHS.RecMII) {
563 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
564 return Colocate < RHS.Colocate;
565 if (MaxMOV == RHS.MaxMOV)
566 return MaxDepth > RHS.MaxDepth;
567 return MaxMOV < RHS.MaxMOV;
568 }
569 return RecMII > RHS.RecMII;
570 }
571
572 bool operator==(const NodeSet &RHS) const {
573 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
574 MaxDepth == RHS.MaxDepth;
575 }
576
577 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
578
579 iterator begin() { return Nodes.begin(); }
580 iterator end() { return Nodes.end(); }
581
582 void print(raw_ostream &os) const {
583 os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
584 << " depth " << MaxDepth << " col " << Colocate << "\n";
585 for (const auto &I : Nodes)
586 os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
587 os << "\n";
588 }
589
590 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
591 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
592 #endif
593 };
594
595 /// This class represents the scheduled code. The main data structure is a
596 /// map from scheduled cycle to instructions. During scheduling, the
597 /// data structure explicitly represents all stages/iterations. When
598 /// the algorithm finshes, the schedule is collapsed into a single stage,
599 /// which represents instructions from different loop iterations.
600 ///
601 /// The SMS algorithm allows negative values for cycles, so the first cycle
602 /// in the schedule is the smallest cycle value.
603 class SMSchedule {
604 private:
605 /// Map from execution cycle to instructions.
606 DenseMap> ScheduledInstrs;
607
608 /// Map from instruction to execution cycle.
609 std::map InstrToCycle;
610
611 /// Map for each register and the max difference between its uses and def.
612 /// The first element in the pair is the max difference in stages. The
613 /// second is true if the register defines a Phi value and loop value is
614 /// scheduled before the Phi.
615 std::map> RegToStageDiff;
616
617 /// Keep track of the first cycle value in the schedule. It starts
618 /// as zero, but the algorithm allows negative values.
619 int FirstCycle = 0;
620
621 /// Keep track of the last cycle value in the schedule.
622 int LastCycle = 0;
623
624 /// The initiation interval (II) for the schedule.
625 int InitiationInterval = 0;
626
627 /// Target machine information.
628 const TargetSubtargetInfo &ST;
629
630 /// Virtual register information.
631 MachineRegisterInfo &MRI;
632
633 std::unique_ptr Resources;
634
635 public:
636 SMSchedule(MachineFunction *mf)
637 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
638 Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
639
640 void reset() {
641 ScheduledInstrs.clear();
642 InstrToCycle.clear();
643 RegToStageDiff.clear();
644 FirstCycle = 0;
645 LastCycle = 0;
646 InitiationInterval = 0;
647 }
648
649 /// Set the initiation interval for this schedule.
650 void setInitiationInterval(int ii) { InitiationInterval = ii; }
651
652 /// Return the first cycle in the completed schedule. This
653 /// can be a negative value.
654 int getFirstCycle() const { return FirstCycle; }
655
656 /// Return the last cycle in the finalized schedule.
657 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
658
659 /// Return the cycle of the earliest scheduled instruction in the dependence
660 /// chain.
661 int earliestCycleInChain(const SDep &Dep);
662
663 /// Return the cycle of the latest scheduled instruction in the dependence
664 /// chain.
665 int latestCycleInChain(const SDep &Dep);
666
667 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
668 int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
669 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
670
671 /// Iterators for the cycle to instruction map.
672 using sched_iterator = DenseMap>::iterator;
673 using const_sched_iterator =
674 DenseMap>::const_iterator;
675
676 /// Return true if the instruction is scheduled at the specified stage.
677 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
678 return (stageScheduled(SU) == (int)StageNum);
679 }
680
681 /// Return the stage for a scheduled instruction. Return -1 if
682 /// the instruction has not been scheduled.
683 int stageScheduled(SUnit *SU) const {
684 std::map::const_iterator it = InstrToCycle.find(SU);
685 if (it == InstrToCycle.end())
686 return -1;
687 return (it->second - FirstCycle) / InitiationInterval;
688 }
689
690 /// Return the cycle for a scheduled instruction. This function normalizes
691 /// the first cycle to be 0.
692 unsigned cycleScheduled(SUnit *SU) const {
693 std::map::const_iterator it = InstrToCycle.find(SU);
694 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
695 return (it->second - FirstCycle) % InitiationInterval;
696 }
697
698 /// Return the maximum stage count needed for this schedule.
699 unsigned getMaxStageCount() {
700 return (LastCycle - FirstCycle) / InitiationInterval;
701 }
702
703 /// Return the max. number of stages/iterations that can occur between a
704 /// register definition and its uses.
705 unsigned getStagesForReg(int Reg, unsigned CurStage) {
706 std::pair Stages = RegToStageDiff[Reg];
707 if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
708 return 1;
709 return Stages.first;
710 }
711
712 /// The number of stages for a Phi is a little different than other
713 /// instructions. The minimum value computed in RegToStageDiff is 1
714 /// because we assume the Phi is needed for at least 1 iteration.
715 /// This is not the case if the loop value is scheduled prior to the
716 /// Phi in the same stage. This function returns the number of stages
717 /// or iterations needed between the Phi definition and any uses.
718 unsigned getStagesForPhi(int Reg) {
719 std::pair Stages = RegToStageDiff[Reg];
720 if (Stages.second)
721 return Stages.first;
722 return Stages.first - 1;
723 }
724
725 /// Return the instructions that are scheduled at the specified cycle.
726 std::deque &getInstructions(int cycle) {
727 return ScheduledInstrs[cycle];
728 }
729
730 bool isValidSchedule(SwingSchedulerDAG *SSD);
731 void finalizeSchedule(SwingSchedulerDAG *SSD);
732 void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
733 std::deque &Insts);
734 bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
735 bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
736 MachineOperand &MO);
737 void print(raw_ostream &os) const;
738 void dump() const;
739 };
740
741 } // end anonymous namespace
151 } // end namespace llvm
742152
743153 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
744154 char MachinePipeliner::ID = 0;