llvm.org GIT mirror llvm / aaf44af
[LPM] Make LoopSimplify no longer a LoopPass and instead both a utility function and a FunctionPass. This has many benefits. The motivating use case was to be able to compute function analysis passes *after* running LoopSimplify (to avoid invalidating them) and then to run other passes which require LoopSimplify. Specifically passes like unrolling and vectorization are critical to wire up to BranchProbabilityInfo and BlockFrequencyInfo so that they can be profile aware. For the LoopVectorize pass the only things in the way are LoopSimplify and LCSSA. This fixes LoopSimplify and LCSSA is next on my list. There are also a bunch of other benefits of doing this: - It is now very feasible to make more passes *preserve* LoopSimplify because they can simply run it after changing a loop. Because subsequence passes can assume LoopSimplify is preserved we can reduce the runs of this pass to the times when we actually mutate a loop structure. - The new pass manager should be able to more easily support loop passes factored in this way. - We can at long, long last observe that LoopSimplify is preserved across SCEV. This *halves* the number of times we run LoopSimplify!!! Now, getting here wasn't trivial. First off, the interfaces used by LoopSimplify are all over the map regarding how analysis are updated. We end up with weird "pass" parameters as a consequence. I'll try to clean at least some of this up later -- I'll have to have it all clean for the new pass manager. Next up I discovered a really frustrating bug. LoopUnroll *claims* to preserve LoopSimplify. That's actually a lie. But the way the LoopPassManager ends up running the passes, it always ran LoopSimplify on the unrolled-into loop, rectifying this oversight before any verification could kick in and point out that in fact nothing was preserved. So I've added code to the unroller to *actually* simplify the surrounding loop when it succeeds at unrolling. The only functional change in the test suite is that we now catch a case that was previously missed because SCEV and other loop transforms see their containing loops as simplified and thus don't miss some opportunities. One test case has been converted to check that we catch this case rather than checking that we miss it but at least don't get the wrong answer. Note that I have #if-ed out all of the verification logic in LoopSimplify! This is a temporary workaround while extracting these bits from the LoopPassManager. Currently, there is no way to have a pass in the LoopPassManager which preserves LoopSimplify along with one which does not. The LPM will try to verify on each loop in the nest that LoopSimplify holds but the now-Function-pass cannot distinguish what loop is being verified and so must try to verify all of them. The inner most loop is clearly no longer simplified as there is a pass which didn't even *attempt* to preserve it. =/ Once I get LCSSA out (and maybe LoopVectorize and some other fixes) I'll be able to re-enable this check and catch any places where we are still failing to preserve LoopSimplify. If this causes problems I can back this out and try to commit *all* of this at once, but so far this seems to work and allow much more incremental progress. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199884 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 6 years ago
6 changed file(s) with 484 addition(s) and 404 deletion(s). Raw diff Collapse all Expand all
1414 #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
1515
1616 namespace llvm {
17
17 class AliasAnalysis;
18 class BasicBlock;
19 class DominatorTree;
1820 class Loop;
21 class LoopInfo;
1922 class Pass;
23 class ScalarEvolution;
2024
2125 BasicBlock *InsertPreheaderForLoop(Loop *L, Pass *P);
26
27 /// \brief Simplify each loop in a loop nest recursively.
28 ///
29 /// This takes a potentially un-simplified loop L (and its children) and turns
30 /// it into a simplified loop nest with preheaders and single backedges. It
31 /// will optionally update \c AliasAnalysis and \c ScalarEvolution analyses if
32 /// passed into it.
33 bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
34 AliasAnalysis *AA = 0, ScalarEvolution *SE = 0);
2235
2336 }
2437
2020 class Loop;
2121 class LoopInfo;
2222 class LPPassManager;
23 class Pass;
2324
2425 bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
25 unsigned TripMultiple, LoopInfo* LI, LPPassManager* LPM);
26 unsigned TripMultiple, LoopInfo *LI, Pass *PP,
27 LPPassManager *LPM);
2628
2729 bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
2830 LPPassManager* LPM);
253253 }
254254
255255 // Unroll the loop.
256 if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, &LPM))
256 if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, this, &LPM))
257257 return false;
258258
259259 return true;
4141 #include "llvm/ADT/DepthFirstIterator.h"
4242 #include "llvm/ADT/SetOperations.h"
4343 #include "llvm/ADT/SetVector.h"
44 #include "llvm/ADT/SmallVector.h"
4445 #include "llvm/ADT/Statistic.h"
4546 #include "llvm/Analysis/AliasAnalysis.h"
4647 #include "llvm/Analysis/DependenceAnalysis.h"
4748 #include "llvm/Analysis/InstructionSimplify.h"
48 #include "llvm/Analysis/LoopPass.h"
49 #include "llvm/Analysis/LoopInfo.h"
4950 #include "llvm/Analysis/ScalarEvolution.h"
5051 #include "llvm/IR/Constants.h"
5152 #include "llvm/IR/Dominators.h"
6465 STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
6566 STATISTIC(NumNested , "Number of nested loops split out");
6667
67 namespace {
68 struct LoopSimplify : public LoopPass {
69 static char ID; // Pass identification, replacement for typeid
70 LoopSimplify() : LoopPass(ID) {
71 initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
72 }
73
74 // AA - If we have an alias analysis object to update, this is it, otherwise
75 // this is null.
76 AliasAnalysis *AA;
77 LoopInfo *LI;
78 DominatorTree *DT;
79 ScalarEvolution *SE;
80 Loop *L;
81 virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
82
83 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
84 // We need loop information to identify the loops...
85 AU.addRequired();
86 AU.addPreserved();
87
88 AU.addRequired();
89 AU.addPreserved();
90
91 AU.addPreserved();
92 AU.addPreserved();
93 AU.addPreserved();
94 AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
95 }
96
97 /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
98 void verifyAnalysis() const;
99
100 private:
101 bool ProcessLoop(Loop *L, LPPassManager &LPM);
102 BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
103 Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
104 BasicBlock *Preheader);
105 BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
106 };
107 }
108
109 static void PlaceSplitBlockCarefully(BasicBlock *NewBB,
110 SmallVectorImpl &SplitPreds,
111 Loop *L);
112
113 char LoopSimplify::ID = 0;
114 INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
115 "Canonicalize natural loops", true, false)
116 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
117 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
118 INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
119 "Canonicalize natural loops", true, false)
120
121 // Publicly exposed interface to pass...
122 char &llvm::LoopSimplifyID = LoopSimplify::ID;
123 Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
124
125 /// runOnLoop - Run down all loops in the CFG (recursively, but we could do
126 /// it in any convenient order) inserting preheaders...
127 ///
128 bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
129 L = l;
130 bool Changed = false;
131 LI = &getAnalysis();
132 AA = getAnalysisIfAvailable();
133 DT = &getAnalysis().getDomTree();
134 SE = getAnalysisIfAvailable();
135
136 Changed |= ProcessLoop(L, LPM);
137
138 return Changed;
139 }
140
141 /// ProcessLoop - Walk the loop structure in depth first order, ensuring that
142 /// all loops have preheaders.
143 ///
144 bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
145 bool Changed = false;
146 ReprocessLoop:
147
148 // Check to see that no blocks (other than the header) in this loop have
149 // predecessors that are not in the loop. This is not valid for natural
150 // loops, but can occur if the blocks are unreachable. Since they are
151 // unreachable we can just shamelessly delete those CFG edges!
152 for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
153 BB != E; ++BB) {
154 if (*BB == L->getHeader()) continue;
155
156 SmallPtrSet BadPreds;
157 for (pred_iterator PI = pred_begin(*BB),
158 PE = pred_end(*BB); PI != PE; ++PI) {
159 BasicBlock *P = *PI;
160 if (!L->contains(P))
161 BadPreds.insert(P);
162 }
163
164 // Delete each unique out-of-loop (and thus dead) predecessor.
165 for (SmallPtrSet::iterator I = BadPreds.begin(),
166 E = BadPreds.end(); I != E; ++I) {
167
168 DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
169 << (*I)->getName() << "\n");
170
171 // Inform each successor of each dead pred.
172 for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
173 (*SI)->removePredecessor(*I);
174 // Zap the dead pred's terminator and replace it with unreachable.
175 TerminatorInst *TI = (*I)->getTerminator();
176 TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
177 (*I)->getTerminator()->eraseFromParent();
178 new UnreachableInst((*I)->getContext(), *I);
179 Changed = true;
180 }
181 }
182
183 // If there are exiting blocks with branches on undef, resolve the undef in
184 // the direction which will exit the loop. This will help simplify loop
185 // trip count computations.
186 SmallVector ExitingBlocks;
187 L->getExitingBlocks(ExitingBlocks);
188 for (SmallVectorImpl::iterator I = ExitingBlocks.begin(),
189 E = ExitingBlocks.end(); I != E; ++I)
190 if (BranchInst *BI = dyn_cast((*I)->getTerminator()))
191 if (BI->isConditional()) {
192 if (UndefValue *Cond = dyn_cast(BI->getCondition())) {
193
194 DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
195 << (*I)->getName() << "\n");
196
197 BI->setCondition(ConstantInt::get(Cond->getType(),
198 !L->contains(BI->getSuccessor(0))));
199
200 // This may make the loop analyzable, force SCEV recomputation.
201 if (SE)
202 SE->forgetLoop(L);
203
204 Changed = true;
205 }
206 }
207
208 // Does the loop already have a preheader? If so, don't insert one.
209 BasicBlock *Preheader = L->getLoopPreheader();
210 if (!Preheader) {
211 Preheader = InsertPreheaderForLoop(L, this);
212 if (Preheader) {
213 ++NumInserted;
214 Changed = true;
215 }
216 }
217
218 // Next, check to make sure that all exit nodes of the loop only have
219 // predecessors that are inside of the loop. This check guarantees that the
220 // loop preheader/header will dominate the exit blocks. If the exit block has
221 // predecessors from outside of the loop, split the edge now.
222 SmallVector ExitBlocks;
223 L->getExitBlocks(ExitBlocks);
224
225 SmallSetVector ExitBlockSet(ExitBlocks.begin(),
226 ExitBlocks.end());
227 for (SmallSetVector::iterator I = ExitBlockSet.begin(),
228 E = ExitBlockSet.end(); I != E; ++I) {
229 BasicBlock *ExitBlock = *I;
230 for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
231 PI != PE; ++PI)
232 // Must be exactly this loop: no subloops, parent loops, or non-loop preds
233 // allowed.
234 if (!L->contains(*PI)) {
235 if (RewriteLoopExitBlock(L, ExitBlock)) {
236 ++NumInserted;
237 Changed = true;
238 }
239 break;
240 }
241 }
242
243 // If the header has more than two predecessors at this point (from the
244 // preheader and from multiple backedges), we must adjust the loop.
245 BasicBlock *LoopLatch = L->getLoopLatch();
246 if (!LoopLatch) {
247 // If this is really a nested loop, rip it out into a child loop. Don't do
248 // this for loops with a giant number of backedges, just factor them into a
249 // common backedge instead.
250 if (L->getNumBackEdges() < 8) {
251 if (SeparateNestedLoop(L, LPM, Preheader)) {
252 ++NumNested;
253 // This is a big restructuring change, reprocess the whole loop.
254 Changed = true;
255 // GCC doesn't tail recursion eliminate this.
256 goto ReprocessLoop;
257 }
258 }
259
260 // If we either couldn't, or didn't want to, identify nesting of the loops,
261 // insert a new block that all backedges target, then make it jump to the
262 // loop header.
263 LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
264 if (LoopLatch) {
265 ++NumInserted;
266 Changed = true;
267 }
268 }
269
270 // Scan over the PHI nodes in the loop header. Since they now have only two
271 // incoming values (the loop is canonicalized), we may have simplified the PHI
272 // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
273 PHINode *PN;
274 for (BasicBlock::iterator I = L->getHeader()->begin();
275 (PN = dyn_cast(I++)); )
276 if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
277 if (AA) AA->deleteValue(PN);
278 if (SE) SE->forgetValue(PN);
279 PN->replaceAllUsesWith(V);
280 PN->eraseFromParent();
281 }
282
283 // If this loop has multiple exits and the exits all go to the same
284 // block, attempt to merge the exits. This helps several passes, such
285 // as LoopRotation, which do not support loops with multiple exits.
286 // SimplifyCFG also does this (and this code uses the same utility
287 // function), however this code is loop-aware, where SimplifyCFG is
288 // not. That gives it the advantage of being able to hoist
289 // loop-invariant instructions out of the way to open up more
290 // opportunities, and the disadvantage of having the responsibility
291 // to preserve dominator information.
292 bool UniqueExit = true;
293 if (!ExitBlocks.empty())
294 for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
295 if (ExitBlocks[i] != ExitBlocks[0]) {
296 UniqueExit = false;
297 break;
298 }
299 if (UniqueExit) {
300 for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
301 BasicBlock *ExitingBlock = ExitingBlocks[i];
302 if (!ExitingBlock->getSinglePredecessor()) continue;
303 BranchInst *BI = dyn_cast(ExitingBlock->getTerminator());
304 if (!BI || !BI->isConditional()) continue;
305 CmpInst *CI = dyn_cast(BI->getCondition());
306 if (!CI || CI->getParent() != ExitingBlock) continue;
307
308 // Attempt to hoist out all instructions except for the
309 // comparison and the branch.
310 bool AllInvariant = true;
311 bool AnyInvariant = false;
312 for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
313 Instruction *Inst = I++;
314 // Skip debug info intrinsics.
315 if (isa(Inst))
316 continue;
317 if (Inst == CI)
318 continue;
319 if (!L->makeLoopInvariant(Inst, AnyInvariant,
320 Preheader ? Preheader->getTerminator() : 0)) {
321 AllInvariant = false;
322 break;
323 }
324 }
325 if (AnyInvariant) {
326 Changed = true;
327 // The loop disposition of all SCEV expressions that depend on any
328 // hoisted values have also changed.
329 if (SE)
330 SE->forgetLoopDispositions(L);
331 }
332 if (!AllInvariant) continue;
333
334 // The block has now been cleared of all instructions except for
335 // a comparison and a conditional branch. SimplifyCFG may be able
336 // to fold it now.
337 if (!FoldBranchToCommonDest(BI)) continue;
338
339 // Success. The block is now dead, so remove it from the loop,
340 // update the dominator tree and delete it.
341 DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
342 << ExitingBlock->getName() << "\n");
343
344 // Notify ScalarEvolution before deleting this block. Currently assume the
345 // parent loop doesn't change (spliting edges doesn't count). If blocks,
346 // CFG edges, or other values in the parent loop change, then we need call
347 // to forgetLoop() for the parent instead.
348 if (SE)
349 SE->forgetLoop(L);
350
351 assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
352 Changed = true;
353 LI->removeBlock(ExitingBlock);
354
355 DomTreeNode *Node = DT->getNode(ExitingBlock);
356 const std::vector *> &Children =
357 Node->getChildren();
358 while (!Children.empty()) {
359 DomTreeNode *Child = Children.front();
360 DT->changeImmediateDominator(Child, Node->getIDom());
361 }
362 DT->eraseNode(ExitingBlock);
363
364 BI->getSuccessor(0)->removePredecessor(ExitingBlock);
365 BI->getSuccessor(1)->removePredecessor(ExitingBlock);
366 ExitingBlock->eraseFromParent();
367 }
368 }
369
370 return Changed;
68 // If the block isn't already, move the new block to right after some 'outside
69 // block' block. This prevents the preheader from being placed inside the loop
70 // body, e.g. when the loop hasn't been rotated.
71 static void placeSplitBlockCarefully(BasicBlock *NewBB,
72 SmallVectorImpl &SplitPreds,
73 Loop *L) {
74 // Check to see if NewBB is already well placed.
75 Function::iterator BBI = NewBB; --BBI;
76 for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
77 if (&*BBI == SplitPreds[i])
78 return;
79 }
80
81 // If it isn't already after an outside block, move it after one. This is
82 // always good as it makes the uncond branch from the outside block into a
83 // fall-through.
84
85 // Figure out *which* outside block to put this after. Prefer an outside
86 // block that neighbors a BB actually in the loop.
87 BasicBlock *FoundBB = 0;
88 for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
89 Function::iterator BBI = SplitPreds[i];
90 if (++BBI != NewBB->getParent()->end() &&
91 L->contains(BBI)) {
92 FoundBB = SplitPreds[i];
93 break;
94 }
95 }
96
97 // If our heuristic for a *good* bb to place this after doesn't find
98 // anything, just pick something. It's likely better than leaving it within
99 // the loop.
100 if (!FoundBB)
101 FoundBB = SplitPreds[0];
102 NewBB->moveAfter(FoundBB);
371103 }
372104
373105 /// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
412144
413145 // Make sure that NewBB is put someplace intelligent, which doesn't mess up
414146 // code layout too horribly.
415 PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
147 placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
416148
417149 return PreheaderBB;
418150 }
419151
420 /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
421 /// blocks. This method is used to split exit blocks that have predecessors
422 /// outside of the loop.
423 BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
152 /// \brief Ensure that the loop preheader dominates all exit blocks.
153 ///
154 /// This method is used to split exit blocks that have predecessors outside of
155 /// the loop.
156 static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
424157 SmallVector LoopBlocks;
425158 for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
426159 BasicBlock *P = *I;
440173 SplitLandingPadPredecessors(Exit, ArrayRef(&LoopBlocks[0],
441174 LoopBlocks.size()),
442175 ".loopexit", ".nonloopexit",
443 this, NewBBs);
176 PP, NewBBs);
444177 NewExitBB = NewBBs[0];
445178 } else {
446 NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
179 NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", PP);
447180 }
448181
449182 DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
451184 return NewExitBB;
452185 }
453186
454 /// AddBlockAndPredsToSet - Add the specified block, and all of its
455 /// predecessors, to the specified set, if it's not already in there. Stop
456 /// predecessor traversal when we reach StopBlock.
457 static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
187 /// Add the specified block, and all of its predecessors, to the specified set,
188 /// if it's not already in there. Stop predecessor traversal when we reach
189 /// StopBlock.
190 static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
458191 std::set &Blocks) {
459 std::vector WorkList;
460 WorkList.push_back(InputBB);
192 SmallVector Worklist;
193 Worklist.push_back(InputBB);
461194 do {
462 BasicBlock *BB = WorkList.back(); WorkList.pop_back();
195 BasicBlock *BB = Worklist.pop_back_val();
463196 if (Blocks.insert(BB).second && BB != StopBlock)
464197 // If BB is not already processed and it is not a stop block then
465198 // insert its predecessor in the work list
466199 for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
467200 BasicBlock *WBB = *I;
468 WorkList.push_back(WBB);
469 }
470 } while(!WorkList.empty());
471 }
472
473 /// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
474 /// PHI node that tells us how to partition the loops.
475 static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
476 AliasAnalysis *AA, LoopInfo *LI) {
201 Worklist.push_back(WBB);
202 }
203 } while (!Worklist.empty());
204 }
205
206 /// \brief The first part of loop-nestification is to find a PHI node that tells
207 /// us how to partition the loops.
208 static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
209 DominatorTree *DT) {
477210 for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ) {
478211 PHINode *PN = cast(I);
479212 ++I;
495228 return 0;
496229 }
497230
498 // PlaceSplitBlockCarefully - If the block isn't already, move the new block to
499 // right after some 'outside block' block. This prevents the preheader from
500 // being placed inside the loop body, e.g. when the loop hasn't been rotated.
501 void PlaceSplitBlockCarefully(BasicBlock *NewBB,
502 SmallVectorImpl &SplitPreds,
503 Loop *L) {
504 // Check to see if NewBB is already well placed.
505 Function::iterator BBI = NewBB; --BBI;
506 for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
507 if (&*BBI == SplitPreds[i])
508 return;
509 }
510
511 // If it isn't already after an outside block, move it after one. This is
512 // always good as it makes the uncond branch from the outside block into a
513 // fall-through.
514
515 // Figure out *which* outside block to put this after. Prefer an outside
516 // block that neighbors a BB actually in the loop.
517 BasicBlock *FoundBB = 0;
518 for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
519 Function::iterator BBI = SplitPreds[i];
520 if (++BBI != NewBB->getParent()->end() &&
521 L->contains(BBI)) {
522 FoundBB = SplitPreds[i];
523 break;
524 }
525 }
526
527 // If our heuristic for a *good* bb to place this after doesn't find
528 // anything, just pick something. It's likely better than leaving it within
529 // the loop.
530 if (!FoundBB)
531 FoundBB = SplitPreds[0];
532 NewBB->moveAfter(FoundBB);
533 }
534
535
536 /// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
537 /// them out into a nested loop. This is important for code that looks like
231 /// \brief If this loop has multiple backedges, try to pull one of them out into
232 /// a nested loop.
233 ///
234 /// This is important for code that looks like
538235 /// this:
539236 ///
540237 /// Loop:
550247 /// If we are able to separate out a loop, return the new outer loop that was
551248 /// created.
552249 ///
553 Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
554 BasicBlock *Preheader) {
250 static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
251 AliasAnalysis *AA, DominatorTree *DT,
252 LoopInfo *LI, ScalarEvolution *SE, Pass *PP) {
555253 // Don't try to separate loops without a preheader.
556254 if (!Preheader)
557255 return 0;
560258 assert(!L->getHeader()->isLandingPad() &&
561259 "Can't insert backedge to landing pad");
562260
563 PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
261 PHINode *PN = findPHIToPartitionLoops(L, AA, DT);
564262 if (PN == 0) return 0; // No known way to partition.
565263
566264 // Pull out all predecessors that have varying values in the loop. This
586284
587285 BasicBlock *Header = L->getHeader();
588286 BasicBlock *NewBB =
589 SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this);
287 SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", PP);
590288
591289 // Make sure that NewBB is put someplace intelligent, which doesn't mess up
592290 // code layout too horribly.
593 PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
291 placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
594292
595293 // Create the new outer loop.
596294 Loop *NewOuter = new Loop();
603301
604302 // L is now a subloop of our outer loop.
605303 NewOuter->addChildLoop(L);
606
607 // Add the new loop to the pass manager queue.
608 LPM.insertLoopIntoQueue(NewOuter);
609304
610305 for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
611306 I != E; ++I)
621316 for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
622317 BasicBlock *P = *PI;
623318 if (DT->dominates(Header, P))
624 AddBlockAndPredsToSet(P, Header, BlocksInL);
319 addBlockAndPredsToSet(P, Header, BlocksInL);
625320 }
626321
627322 // Scan all of the loop children of L, moving them to OuterLoop if they are
649344 return NewOuter;
650345 }
651346
652
653
654 /// InsertUniqueBackedgeBlock - This method is called when the specified loop
655 /// has more than one backedge in it. If this occurs, revector all of these
656 /// backedges to target a new basic block and have that block branch to the loop
657 /// header. This ensures that loops have exactly one backedge.
658 ///
659 BasicBlock *
660 LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
347 /// \brief This method is called when the specified loop has more than one
348 /// backedge in it.
349 ///
350 /// If this occurs, revector all of these backedges to target a new basic block
351 /// and have that block branch to the loop header. This ensures that loops
352 /// have exactly one backedge.
353 static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
354 AliasAnalysis *AA,
355 DominatorTree *DT, LoopInfo *LI) {
661356 assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
662357
663358 // Get information about the loop
768463 return BEBlock;
769464 }
770465
771 void LoopSimplify::verifyAnalysis() const {
466 /// \brief Simplify one loop and queue further loops for simplification.
467 ///
468 /// FIXME: Currently this accepts both lots of analyses that it uses and a raw
469 /// Pass pointer. The Pass pointer is used by numerous utilities to update
470 /// specific analyses. Rather than a pass it would be much cleaner and more
471 /// explicit if they accepted the analysis directly and then updated it.
472 static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist,
473 AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
474 ScalarEvolution *SE, Pass *PP) {
475 bool Changed = false;
476 ReprocessLoop:
477
478 // Check to see that no blocks (other than the header) in this loop have
479 // predecessors that are not in the loop. This is not valid for natural
480 // loops, but can occur if the blocks are unreachable. Since they are
481 // unreachable we can just shamelessly delete those CFG edges!
482 for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
483 BB != E; ++BB) {
484 if (*BB == L->getHeader()) continue;
485
486 SmallPtrSet BadPreds;
487 for (pred_iterator PI = pred_begin(*BB),
488 PE = pred_end(*BB); PI != PE; ++PI) {
489 BasicBlock *P = *PI;
490 if (!L->contains(P))
491 BadPreds.insert(P);
492 }
493
494 // Delete each unique out-of-loop (and thus dead) predecessor.
495 for (SmallPtrSet::iterator I = BadPreds.begin(),
496 E = BadPreds.end(); I != E; ++I) {
497
498 DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
499 << (*I)->getName() << "\n");
500
501 // Inform each successor of each dead pred.
502 for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
503 (*SI)->removePredecessor(*I);
504 // Zap the dead pred's terminator and replace it with unreachable.
505 TerminatorInst *TI = (*I)->getTerminator();
506 TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
507 (*I)->getTerminator()->eraseFromParent();
508 new UnreachableInst((*I)->getContext(), *I);
509 Changed = true;
510 }
511 }
512
513 // If there are exiting blocks with branches on undef, resolve the undef in
514 // the direction which will exit the loop. This will help simplify loop
515 // trip count computations.
516 SmallVector ExitingBlocks;
517 L->getExitingBlocks(ExitingBlocks);
518 for (SmallVectorImpl::iterator I = ExitingBlocks.begin(),
519 E = ExitingBlocks.end(); I != E; ++I)
520 if (BranchInst *BI = dyn_cast((*I)->getTerminator()))
521 if (BI->isConditional()) {
522 if (UndefValue *Cond = dyn_cast(BI->getCondition())) {
523
524 DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
525 << (*I)->getName() << "\n");
526
527 BI->setCondition(ConstantInt::get(Cond->getType(),
528 !L->contains(BI->getSuccessor(0))));
529
530 // This may make the loop analyzable, force SCEV recomputation.
531 if (SE)
532 SE->forgetLoop(L);
533
534 Changed = true;
535 }
536 }
537
538 // Does the loop already have a preheader? If so, don't insert one.
539 BasicBlock *Preheader = L->getLoopPreheader();
540 if (!Preheader) {
541 Preheader = InsertPreheaderForLoop(L, PP);
542 if (Preheader) {
543 ++NumInserted;
544 Changed = true;
545 }
546 }
547
548 // Next, check to make sure that all exit nodes of the loop only have
549 // predecessors that are inside of the loop. This check guarantees that the
550 // loop preheader/header will dominate the exit blocks. If the exit block has
551 // predecessors from outside of the loop, split the edge now.
552 SmallVector ExitBlocks;
553 L->getExitBlocks(ExitBlocks);
554
555 SmallSetVector ExitBlockSet(ExitBlocks.begin(),
556 ExitBlocks.end());
557 for (SmallSetVector::iterator I = ExitBlockSet.begin(),
558 E = ExitBlockSet.end(); I != E; ++I) {
559 BasicBlock *ExitBlock = *I;
560 for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
561 PI != PE; ++PI)
562 // Must be exactly this loop: no subloops, parent loops, or non-loop preds
563 // allowed.
564 if (!L->contains(*PI)) {
565 if (rewriteLoopExitBlock(L, ExitBlock, PP)) {
566 ++NumInserted;
567 Changed = true;
568 }
569 break;
570 }
571 }
572
573 // If the header has more than two predecessors at this point (from the
574 // preheader and from multiple backedges), we must adjust the loop.
575 BasicBlock *LoopLatch = L->getLoopLatch();
576 if (!LoopLatch) {
577 // If this is really a nested loop, rip it out into a child loop. Don't do
578 // this for loops with a giant number of backedges, just factor them into a
579 // common backedge instead.
580 if (L->getNumBackEdges() < 8) {
581 if (Loop *OuterL = separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP)) {
582 ++NumNested;
583 // Enqueue the outer loop as it should be processed next in our
584 // depth-first nest walk.
585 Worklist.push_back(OuterL);
586
587 // This is a big restructuring change, reprocess the whole loop.
588 Changed = true;
589 // GCC doesn't tail recursion eliminate this.
590 // FIXME: It isn't clear we can't rely on LLVM to TRE this.
591 goto ReprocessLoop;
592 }
593 }
594
595 // If we either couldn't, or didn't want to, identify nesting of the loops,
596 // insert a new block that all backedges target, then make it jump to the
597 // loop header.
598 LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI);
599 if (LoopLatch) {
600 ++NumInserted;
601 Changed = true;
602 }
603 }
604
605 // Scan over the PHI nodes in the loop header. Since they now have only two
606 // incoming values (the loop is canonicalized), we may have simplified the PHI
607 // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
608 PHINode *PN;
609 for (BasicBlock::iterator I = L->getHeader()->begin();
610 (PN = dyn_cast(I++)); )
611 if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
612 if (AA) AA->deleteValue(PN);
613 if (SE) SE->forgetValue(PN);
614 PN->replaceAllUsesWith(V);
615 PN->eraseFromParent();
616 }
617
618 // If this loop has multiple exits and the exits all go to the same
619 // block, attempt to merge the exits. This helps several passes, such
620 // as LoopRotation, which do not support loops with multiple exits.
621 // SimplifyCFG also does this (and this code uses the same utility
622 // function), however this code is loop-aware, where SimplifyCFG is
623 // not. That gives it the advantage of being able to hoist
624 // loop-invariant instructions out of the way to open up more
625 // opportunities, and the disadvantage of having the responsibility
626 // to preserve dominator information.
627 bool UniqueExit = true;
628 if (!ExitBlocks.empty())
629 for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
630 if (ExitBlocks[i] != ExitBlocks[0]) {
631 UniqueExit = false;
632 break;
633 }
634 if (UniqueExit) {
635 for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
636 BasicBlock *ExitingBlock = ExitingBlocks[i];
637 if (!ExitingBlock->getSinglePredecessor()) continue;
638 BranchInst *BI = dyn_cast(ExitingBlock->getTerminator());
639 if (!BI || !BI->isConditional()) continue;
640 CmpInst *CI = dyn_cast(BI->getCondition());
641 if (!CI || CI->getParent() != ExitingBlock) continue;
642
643 // Attempt to hoist out all instructions except for the
644 // comparison and the branch.
645 bool AllInvariant = true;
646 bool AnyInvariant = false;
647 for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
648 Instruction *Inst = I++;
649 // Skip debug info intrinsics.
650 if (isa(Inst))
651 continue;
652 if (Inst == CI)
653 continue;
654 if (!L->makeLoopInvariant(Inst, AnyInvariant,
655 Preheader ? Preheader->getTerminator() : 0)) {
656 AllInvariant = false;
657 break;
658 }
659 }
660 if (AnyInvariant) {
661 Changed = true;
662 // The loop disposition of all SCEV expressions that depend on any
663 // hoisted values have also changed.
664 if (SE)
665 SE->forgetLoopDispositions(L);
666 }
667 if (!AllInvariant) continue;
668
669 // The block has now been cleared of all instructions except for
670 // a comparison and a conditional branch. SimplifyCFG may be able
671 // to fold it now.
672 if (!FoldBranchToCommonDest(BI)) continue;
673
674 // Success. The block is now dead, so remove it from the loop,
675 // update the dominator tree and delete it.
676 DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
677 << ExitingBlock->getName() << "\n");
678
679 // Notify ScalarEvolution before deleting this block. Currently assume the
680 // parent loop doesn't change (spliting edges doesn't count). If blocks,
681 // CFG edges, or other values in the parent loop change, then we need call
682 // to forgetLoop() for the parent instead.
683 if (SE)
684 SE->forgetLoop(L);
685
686 assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
687 Changed = true;
688 LI->removeBlock(ExitingBlock);
689
690 DomTreeNode *Node = DT->getNode(ExitingBlock);
691 const std::vector *> &Children =
692 Node->getChildren();
693 while (!Children.empty()) {
694 DomTreeNode *Child = Children.front();
695 DT->changeImmediateDominator(Child, Node->getIDom());
696 }
697 DT->eraseNode(ExitingBlock);
698
699 BI->getSuccessor(0)->removePredecessor(ExitingBlock);
700 BI->getSuccessor(1)->removePredecessor(ExitingBlock);
701 ExitingBlock->eraseFromParent();
702 }
703 }
704
705 return Changed;
706 }
707
708 bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
709 AliasAnalysis *AA, ScalarEvolution *SE) {
710 bool Changed = false;
711
712 // Worklist maintains our depth-first queue of loops in this nest to process.
713 SmallVector Worklist;
714 Worklist.push_back(L);
715
716 // Walk the worklist from front to back, pushing newly found sub loops onto
717 // the back. This will let us process loops from back to front in depth-first
718 // order. We can use this simple process because loops form a tree.
719 for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
720 Loop *L2 = Worklist[Idx];
721 for (Loop::iterator I = L2->begin(), E = L2->end(); I != E; ++I)
722 Worklist.push_back(*I);
723 }
724
725 while (!Worklist.empty())
726 Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, SE, PP);
727
728 return Changed;
729 }
730
731 namespace {
732 struct LoopSimplify : public FunctionPass {
733 static char ID; // Pass identification, replacement for typeid
734 LoopSimplify() : FunctionPass(ID) {
735 initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
736 }
737
738 // AA - If we have an alias analysis object to update, this is it, otherwise
739 // this is null.
740 AliasAnalysis *AA;
741 DominatorTree *DT;
742 LoopInfo *LI;
743 ScalarEvolution *SE;
744
745 virtual bool runOnFunction(Function &F);
746
747 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
748 // We need loop information to identify the loops...
749 AU.addRequired();
750 AU.addPreserved();
751
752 AU.addRequired();
753 AU.addPreserved();
754
755 AU.addPreserved();
756 AU.addPreserved();
757 AU.addPreserved();
758 AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
759 }
760
761 /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
762 void verifyAnalysis() const;
763
764 private:
765 bool ProcessLoop(Loop *L);
766 BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
767 Loop *SeparateNestedLoop(Loop *L, BasicBlock *Preheader);
768 BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
769 };
770 }
771
772 char LoopSimplify::ID = 0;
773 INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
774 "Canonicalize natural loops", true, false)
775 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
776 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
777 INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
778 "Canonicalize natural loops", true, false)
779
780 // Publicly exposed interface to pass...
781 char &llvm::LoopSimplifyID = LoopSimplify::ID;
782 Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
783
784 /// runOnLoop - Run down all loops in the CFG (recursively, but we could do
785 /// it in any convenient order) inserting preheaders...
786 ///
787 bool LoopSimplify::runOnFunction(Function &F) {
788 bool Changed = false;
789 AA = getAnalysisIfAvailable();
790 LI = &getAnalysis();
791 DT = &getAnalysis().getDomTree();
792 SE = getAnalysisIfAvailable();
793
794 // Simplify each loop nest in the function.
795 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
796 Changed |= simplifyLoop(*I, DT, LI, this, AA, SE);
797
798 return Changed;
799 }
800
801 // FIXME: Restore this code when we re-enable verification in verifyAnalysis
802 // below.
803 #if 0
804 static void verifyLoop(Loop *L) {
805 // Verify subloops.
806 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
807 verifyLoop(*I);
808
772809 // It used to be possible to just assert L->isLoopSimplifyForm(), however
773810 // with the introduction of indirectbr, there are now cases where it's
774811 // not possible to transform a loop as necessary. We can at least check
805842 (void)HasIndBrExiting;
806843 }
807844 }
845 #endif
846
847 void LoopSimplify::verifyAnalysis() const {
848 // FIXME: This routine is being called mid-way through the loop pass manager
849 // as loop passes destroy this analysis. That's actually fine, but we have no
850 // way of expressing that here. Once all of the passes that destroy this are
851 // hoisted out of the loop pass manager we can add back verification here.
852 #if 0
853 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
854 verifyLoop(*I);
855 #endif
856 }
2929 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
3030 #include "llvm/Transforms/Utils/Cloning.h"
3131 #include "llvm/Transforms/Utils/Local.h"
32 #include "llvm/Transforms/Utils/LoopUtils.h"
3233 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
3334 using namespace llvm;
3435
137138 /// removed from the LoopPassManager as well. LPM can also be NULL.
138139 ///
139140 /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
140 /// available it must also preserve those analyses.
141 /// available from the Pass it must also preserve those analyses.
141142 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
142143 bool AllowRuntime, unsigned TripMultiple,
143 LoopInfo *LI, LPPassManager *LPM) {
144 LoopInfo *LI, Pass *PP, LPPassManager *LPM) {
144145 BasicBlock *Preheader = L->getLoopPreheader();
145146 if (!Preheader) {
146147 DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
208209
209210 // Notify ScalarEvolution that the loop will be substantially changed,
210211 // if not outright eliminated.
211 if (LPM) {
212 ScalarEvolution *SE = LPM->getAnalysisIfAvailable();
212 if (PP) {
213 ScalarEvolution *SE = PP->getAnalysisIfAvailable();
213214 if (SE)
214215 SE->forgetLoop(L);
215216 }
409410 }
410411 }
411412
412 if (LPM) {
413 DominatorTree *DT = 0;
414 if (PP) {
413415 // FIXME: Reconstruct dom info, because it is not preserved properly.
414416 // Incrementally updating domtree after loop unrolling would be easy.
415417 if (DominatorTreeWrapperPass *DTWP =
416 LPM->getAnalysisIfAvailable())
417 DTWP->getDomTree().recalculate(*L->getHeader()->getParent());
418 PP->getAnalysisIfAvailable()) {
419 DT = &DTWP->getDomTree();
420 DT->recalculate(*L->getHeader()->getParent());
421 }
418422
419423 // Simplify any new induction variables in the partially unrolled loop.
420 ScalarEvolution *SE = LPM->getAnalysisIfAvailable();
424 ScalarEvolution *SE = PP->getAnalysisIfAvailable();
421425 if (SE && !CompletelyUnroll) {
422426 SmallVector DeadInsts;
423427 simplifyLoopIVs(L, SE, LPM, DeadInsts);
450454
451455 NumCompletelyUnrolled += CompletelyUnroll;
452456 ++NumUnrolled;
457
458 Loop *OuterL = L->getParentLoop();
453459 // Remove the loop from the LoopPassManager if it's completely removed.
454460 if (CompletelyUnroll && LPM != NULL)
455461 LPM->deleteLoopFromQueue(L);
456462
463 // If we have a pass and a DominatorTree we should re-simplify impacted loops
464 // to ensure subsequent analyses can rely on this form. We want to simplify
465 // at least one layer outside of the loop that was unrolled so that any
466 // changes to the parent loop exposed by the unrolling are considered.
467 if (PP && DT) {
468 if (!OuterL && !CompletelyUnroll)
469 OuterL = L;
470 if (OuterL)
471 simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ 0,
472 PP->getAnalysisIfAvailable());
473 }
474
457475 return true;
458476 }
3737 ret void
3838 }
3939
40 ; It would be nice if SCEV and any loop analysis could assume that
41 ; preheaders exist. Unfortunately it is not always the case. This test
42 ; checks that SCEVExpander can handle an outer loop that has not yet
43 ; been simplified. As a result, the inner loop's exit test will not be
44 ; rewritten.
40 ; This test checks that SCEVExpander can handle an outer loop that has been
41 ; simplified, and as a result the inner loop's exit test will be rewritten.
4542 define void @expandOuterRecurrence(i32 %arg) nounwind {
4643 entry:
4744 %sub1 = sub nsw i32 %arg, 1
4845 %cmp1 = icmp slt i32 0, %sub1
4946 br i1 %cmp1, label %outer, label %exit
5047
48 ; CHECK: outer:
49 ; CHECK: icmp slt
5150 outer:
5251 %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ]
5352 %sub2 = sub nsw i32 %arg, %i
5958 br label %inner
6059
6160 ; CHECK: inner:
62 ; CHECK: icmp slt
6361 ; CHECK: br i1
6462 inner:
6563 %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ]