llvm.org GIT mirror llvm / e69c6a9
[LICM] Make Loop ICM profile aware Summary: Hoisting/sinking instruction out of a loop isn't always beneficial. Hoisting an instruction from a cold block inside a loop body out of the loop could hurt performance. This change makes Loop ICM profile aware - it now checks block frequency to make sure hoisting/sinking anly moves instruction to colder block. Test Plan: ninja check Reviewers: asbirlea, sanjoy, reames, nikic, hfinkel, vsk Reviewed By: asbirlea Subscribers: fhahn, vsk, davidxl, xbolva00, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65060 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368526 91177308-0d34-0410-b5e6-96231b3b80d8 Wenlei He a month ago
7 changed file(s) with 120 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
3636 class AliasSet;
3737 class AliasSetTracker;
3838 class BasicBlock;
39 class BlockFrequencyInfo;
3940 class DataLayout;
4041 class Loop;
4142 class LoopInfo;
113114 /// reverse depth first order w.r.t the DominatorTree. This allows us to visit
114115 /// uses before definitions, allowing us to sink a loop body in one pass without
115116 /// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,
116 /// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all
117 /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
117118 /// instructions of the loop and loop safety information as
118119 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
119120 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
120 TargetLibraryInfo *, TargetTransformInfo *, Loop *,
121 AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
121 BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *,
122 Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
122123 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
123124
124125 /// Walk the specified region of the CFG (defined by all blocks
125126 /// dominated by the specified block, and that are in the current loop) in depth
126127 /// first order w.r.t the DominatorTree. This allows us to visit definitions
127128 /// before uses, allowing us to hoist a loop body in one pass without iteration.
128 /// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,
129 /// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, BlockFrequencyInfo,
129130 /// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
130131 /// loop and loop safety information as arguments. Diagnostics is emitted via \p
131132 /// ORE. It returns changed status.
132133 bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
133 TargetLibraryInfo *, Loop *, AliasSetTracker *,
134 MemorySSAUpdater *, ICFLoopSafetyInfo *,
135 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
134 BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, AliasSetTracker *,
135 MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
136 OptimizationRemarkEmitter *);
136137
137138 /// This function deletes dead loops. The caller of this function needs to
138139 /// guarantee that the loop is infact dead.
9494 "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
9595 cl::desc("Enable control flow (and PHI) hoisting in LICM"));
9696
97 static cl::opt HoistSinkColdnessThreshold(
98 "licm-coldness-threshold", cl::Hidden, cl::init(4),
99 cl::desc("Relative coldness Threshold of hoisting/sinking destination "
100 "block for LICM to be considered beneficial"));
101
97102 static cl::opt MaxNumUsesTraversed(
98103 "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
99104 cl::desc("Max num uses visited for identifying load "
138143 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
139144 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
140145 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
141 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
142 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
146 BlockFrequencyInfo *BFI, const Loop *CurLoop,
147 ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
148 OptimizationRemarkEmitter *ORE);
143149 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
144150 const DominatorTree *DT,
145151 const Loop *CurLoop,
167173 struct LoopInvariantCodeMotion {
168174 using ASTrackerMapTy = DenseMap>;
169175 bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
170 TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
171 ScalarEvolution *SE, MemorySSA *MSSA,
176 BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
177 TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
172178 OptimizationRemarkEmitter *ORE, bool DeleteAST);
173179
174180 ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
219225 &getAnalysis().getAAResults(),
220226 &getAnalysis().getLoopInfo(),
221227 &getAnalysis().getDomTree(),
228 &getAnalysis().getBFI(),
222229 &getAnalysis().getTLI(),
223230 &getAnalysis().getTTI(
224231 *L->getHeader()->getParent()),
229236 /// loop preheaders be inserted into the CFG...
230237 ///
231238 void getAnalysisUsage(AnalysisUsage &AU) const override {
239 AU.addRequired();
232240 AU.addPreserved();
233241 AU.addPreserved();
234242 AU.addRequired();
285293 "cached at a higher level");
286294
287295 LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
288 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
296 auto BFI = FAM.getCachedResult(*F);
297 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, BFI, &AR.TLI, &AR.TTI, &AR.SE,
289298 AR.MSSA, ORE, true))
290299 return PreservedAnalyses::all();
291300
323332 ///
324333 bool LoopInvariantCodeMotion::runOnLoop(
325334 Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
326 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
327 MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
335 BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
336 ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE,
337 bool DeleteAST) {
328338 bool Changed = false;
329339
330340 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
384394 LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
385395 /*IsSink=*/true};
386396 if (L->hasDedicatedExits())
387 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
397 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
388398 CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
389399 Flags.IsSink = false;
390400 if (Preheader)
391 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
401 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
392402 CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
393403
394404 // Now that all loop invariants have been removed from the loop, promote any
490500 /// definitions, allowing us to sink a loop body in one pass without iteration.
491501 ///
492502 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
493 DominatorTree *DT, TargetLibraryInfo *TLI,
494 TargetTransformInfo *TTI, Loop *CurLoop,
495 AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
503 DominatorTree *DT, BlockFrequencyInfo *BFI,
504 TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
505 Loop *CurLoop, AliasSetTracker *CurAST,
506 MemorySSAUpdater *MSSAU,
496507 ICFLoopSafetyInfo *SafetyInfo,
497508 SinkAndHoistLICMFlags &Flags,
498509 OptimizationRemarkEmitter *ORE) {
541552 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
542553 ORE) &&
543554 !I.mayHaveSideEffects()) {
544 if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
555 if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
545556 if (!FreeInLoop) {
546557 ++II;
547558 eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
785796 };
786797 } // namespace
787798
799 // Hoisting/sinking instruction out of a loop isn't always beneficial. It's only
800 // only worthwhile if the destination block is actually colder than current
801 // block.
802 static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
803 OptimizationRemarkEmitter *ORE,
804 BlockFrequencyInfo *BFI) {
805 // Check block frequency only when runtime profile is available.
806 // to avoid pathological cases. With static profile, lean towards
807 // hosting because it helps canonicalize the loop for vectorizer.
808 if (!DstBlock->getParent()->hasProfileData())
809 return true;
810
811 if (!HoistSinkColdnessThreshold || !BFI)
812 return true;
813
814 BasicBlock *SrcBlock = I.getParent();
815 if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold >
816 BFI->getBlockFreq(SrcBlock).getFrequency()) {
817 ORE->emit([&]() {
818 return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I)
819 << "failed to sink or hoist instruction because containing block "
820 "has lower frequency than destination block";
821 });
822 return false;
823 }
824
825 return true;
826 }
827
788828 /// Walk the specified region of the CFG (defined by all blocks dominated by
789829 /// the specified block, and that are in the current loop) in depth first
790830 /// order w.r.t the DominatorTree. This allows us to visit definitions before
791831 /// uses, allowing us to hoist a loop body in one pass without iteration.
792832 ///
793833 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
794 DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
834 DominatorTree *DT, BlockFrequencyInfo *BFI,
835 TargetLibraryInfo *TLI, Loop *CurLoop,
795836 AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
796837 ICFLoopSafetyInfo *SafetyInfo,
797838 SinkAndHoistLICMFlags &Flags,
842883
843884 // Try hoisting the instruction out to the preheader. We can only do
844885 // this if all of the operands of the instruction are loop invariant and
845 // if it is safe to hoist the instruction.
886 // if it is safe to hoist the instruction. We also check block frequency
887 // to make sure instruction only gets hoisted into colder blocks.
846888 // TODO: It may be safe to hoist if we are hoisting to a conditional block
847889 // and we have accurately duplicated the control flow from the loop header
848890 // to that block.
849891 if (CurLoop->hasLoopInvariantOperands(&I) &&
850892 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
851893 ORE) &&
894 worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
852895 isSafeToExecuteUnconditionally(
853896 I, DT, CurLoop, SafetyInfo, ORE,
854897 CurLoop->getLoopPreheader()->getTerminator())) {
15491592 /// position, and may either delete it or move it to outside of the loop.
15501593 ///
15511594 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1552 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1553 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
1595 BlockFrequencyInfo *BFI, const Loop *CurLoop,
1596 ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
1597 OptimizationRemarkEmitter *ORE) {
15541598 LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
15551599 ORE->emit([&]() {
15561600 return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
16261670 // If this instruction is only used outside of the loop, then all users are
16271671 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
16281672 // the instruction.
1673 // First check if I is worth sinking for all uses. Sink only when it is worth
1674 // across all uses.
16291675 SmallSetVector Users(I.user_begin(), I.user_end());
1676 SmallVector ExitPNs;
16301677 for (auto *UI : Users) {
16311678 auto *User = cast(UI);
16321679
16361683 PHINode *PN = cast(User);
16371684 assert(ExitBlockSet.count(PN->getParent()) &&
16381685 "The LCSSA PHI is not in an exit block!");
1686
1687 if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) {
1688 return Changed;
1689 }
1690
1691 ExitPNs.push_back(PN);
1692 }
1693
1694 for (auto *PN: ExitPNs) {
16391695 // The PHI must be trivially replaceable.
16401696 Instruction *New = sinkThroughTriviallyReplaceablePHI(
16411697 PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
101101 ; CHECK-NEXT: Scalar Evolution Analysis
102102 ; CHECK-NEXT: Loop Pass Manager
103103 ; CHECK-NEXT: Rotate Loops
104 ; CHECK-NEXT: Branch Probability Analysis
105 ; CHECK-NEXT: Block Frequency Analysis
106 ; CHECK-NEXT: Loop Pass Manager
104107 ; CHECK-NEXT: Loop Invariant Code Motion
108 ; CHECK-NEXT: Loop Pass Manager
105109 ; CHECK-NEXT: Unswitch loops
106110 ; CHECK-NEXT: Simplify the CFG
107111 ; CHECK-NEXT: Dominator Tree Construction
153157 ; CHECK-NEXT: Memory Dependence Analysis
154158 ; CHECK-NEXT: Dead Store Elimination
155159 ; CHECK-NEXT: Natural Loop Information
160 ; CHECK-NEXT: Branch Probability Analysis
161 ; CHECK-NEXT: Block Frequency Analysis
156162 ; CHECK-NEXT: Canonicalize natural loops
157163 ; CHECK-NEXT: LCSSA Verifier
158164 ; CHECK-NEXT: Loop-Closed SSA Form Pass
159165 ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
160166 ; CHECK-NEXT: Function Alias Analysis Results
161167 ; CHECK-NEXT: Scalar Evolution Analysis
168 ; CHECK-NEXT: Block Frequency Analysis
162169 ; CHECK-NEXT: Loop Pass Manager
163170 ; CHECK-NEXT: Loop Invariant Code Motion
164171 ; CHECK-NEXT: Post-Dominator Tree Construction
245252 ; CHECK-NEXT: Lazy Block Frequency Analysis
246253 ; CHECK-NEXT: Optimization Remark Emitter
247254 ; CHECK-NEXT: Combine redundant instructions
255 ; CHECK-NEXT: Branch Probability Analysis
256 ; CHECK-NEXT: Block Frequency Analysis
248257 ; CHECK-NEXT: Canonicalize natural loops
249258 ; CHECK-NEXT: LCSSA Verifier
250259 ; CHECK-NEXT: Loop-Closed SSA Form Pass
251260 ; CHECK-NEXT: Scalar Evolution Analysis
261 ; CHECK-NEXT: Block Frequency Analysis
252262 ; CHECK-NEXT: Loop Pass Manager
253263 ; CHECK-NEXT: Loop Invariant Code Motion
254264 ; CHECK-NEXT: Lazy Branch Probability Analysis
106106 ; CHECK-NEXT: Scalar Evolution Analysis
107107 ; CHECK-NEXT: Loop Pass Manager
108108 ; CHECK-NEXT: Rotate Loops
109 ; CHECK-NEXT: Branch Probability Analysis
110 ; CHECK-NEXT: Block Frequency Analysis
111 ; CHECK-NEXT: Loop Pass Manager
109112 ; CHECK-NEXT: Loop Invariant Code Motion
113 ; CHECK-NEXT: Loop Pass Manager
110114 ; CHECK-NEXT: Unswitch loops
111115 ; CHECK-NEXT: Simplify the CFG
112116 ; CHECK-NEXT: Dominator Tree Construction
158162 ; CHECK-NEXT: Memory Dependence Analysis
159163 ; CHECK-NEXT: Dead Store Elimination
160164 ; CHECK-NEXT: Natural Loop Information
165 ; CHECK-NEXT: Branch Probability Analysis
166 ; CHECK-NEXT: Block Frequency Analysis
161167 ; CHECK-NEXT: Canonicalize natural loops
162168 ; CHECK-NEXT: LCSSA Verifier
163169 ; CHECK-NEXT: Loop-Closed SSA Form Pass
164170 ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
165171 ; CHECK-NEXT: Function Alias Analysis Results
166172 ; CHECK-NEXT: Scalar Evolution Analysis
173 ; CHECK-NEXT: Block Frequency Analysis
167174 ; CHECK-NEXT: Loop Pass Manager
168175 ; CHECK-NEXT: Loop Invariant Code Motion
169176 ; CHECK-NEXT: Post-Dominator Tree Construction
250257 ; CHECK-NEXT: Lazy Block Frequency Analysis
251258 ; CHECK-NEXT: Optimization Remark Emitter
252259 ; CHECK-NEXT: Combine redundant instructions
260 ; CHECK-NEXT: Branch Probability Analysis
261 ; CHECK-NEXT: Block Frequency Analysis
253262 ; CHECK-NEXT: Canonicalize natural loops
254263 ; CHECK-NEXT: LCSSA Verifier
255264 ; CHECK-NEXT: Loop-Closed SSA Form Pass
256265 ; CHECK-NEXT: Scalar Evolution Analysis
266 ; CHECK-NEXT: Block Frequency Analysis
257267 ; CHECK-NEXT: Loop Pass Manager
258268 ; CHECK-NEXT: Loop Invariant Code Motion
259269 ; CHECK-NEXT: Lazy Branch Probability Analysis
8888 ; CHECK-NEXT: Scalar Evolution Analysis
8989 ; CHECK-NEXT: Loop Pass Manager
9090 ; CHECK-NEXT: Rotate Loops
91 ; CHECK-NEXT: Branch Probability Analysis
92 ; CHECK-NEXT: Block Frequency Analysis
93 ; CHECK-NEXT: Loop Pass Manager
9194 ; CHECK-NEXT: Loop Invariant Code Motion
95 ; CHECK-NEXT: Loop Pass Manager
9296 ; CHECK-NEXT: Unswitch loops
9397 ; CHECK-NEXT: Simplify the CFG
9498 ; CHECK-NEXT: Dominator Tree Construction
140144 ; CHECK-NEXT: Memory Dependence Analysis
141145 ; CHECK-NEXT: Dead Store Elimination
142146 ; CHECK-NEXT: Natural Loop Information
147 ; CHECK-NEXT: Branch Probability Analysis
148 ; CHECK-NEXT: Block Frequency Analysis
143149 ; CHECK-NEXT: Canonicalize natural loops
144150 ; CHECK-NEXT: LCSSA Verifier
145151 ; CHECK-NEXT: Loop-Closed SSA Form Pass
146152 ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
147153 ; CHECK-NEXT: Function Alias Analysis Results
148154 ; CHECK-NEXT: Scalar Evolution Analysis
155 ; CHECK-NEXT: Block Frequency Analysis
149156 ; CHECK-NEXT: Loop Pass Manager
150157 ; CHECK-NEXT: Loop Invariant Code Motion
151158 ; CHECK-NEXT: Post-Dominator Tree Construction
232239 ; CHECK-NEXT: Lazy Block Frequency Analysis
233240 ; CHECK-NEXT: Optimization Remark Emitter
234241 ; CHECK-NEXT: Combine redundant instructions
242 ; CHECK-NEXT: Branch Probability Analysis
243 ; CHECK-NEXT: Block Frequency Analysis
235244 ; CHECK-NEXT: Canonicalize natural loops
236245 ; CHECK-NEXT: LCSSA Verifier
237246 ; CHECK-NEXT: Loop-Closed SSA Form Pass
238247 ; CHECK-NEXT: Scalar Evolution Analysis
248 ; CHECK-NEXT: Block Frequency Analysis
239249 ; CHECK-NEXT: Loop Pass Manager
240250 ; CHECK-NEXT: Loop Invariant Code Motion
241251 ; CHECK-NEXT: Lazy Branch Probability Analysis
5151 ; CHECK-O2-NEXT: FunctionPass Manager
5252 ; CHECK-O2-NOT: Manager
5353 ; CHECK-O2: Loop Pass Manager
54 ; CHECK-O2-NOT: Manager
54 ; Requiring block frequency for LICM will place ICM and rotation under separate Loop Pass Manager
5555 ; FIXME: We shouldn't be pulling out to simplify-cfg and instcombine and
5656 ; causing new loop pass managers.
5757 ; CHECK-O2: Simplify the CFG
None ; RUN: opt -S -licm < %s | FileCheck %s --check-prefix=CHECK-LICM
0 ; RUN: opt -S -licm -licm -licm-coldness-threshold=0 < %s | FileCheck %s --check-prefix=CHECK-LICM
1 ; RUN: opt -S -licm -licm < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM
12 ; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK
23 ; RUN: opt -S < %s -passes='require,loop(licm),loop-sink' \
34 ; RUN: | FileCheck %s --check-prefix=CHECK-SINK
4 ; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-LICM
5 ; RUN: opt -S -licm -licm-coldness-threshold=0 -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-LICM
6 ; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM
57
68 ; Original source code:
79 ; int g;
2729 ; CHECK-LICM: .lr.ph.preheader:
2830 ; CHECK-LICM: load i32, i32* @g
2931 ; CHECK-LICM: br label %.lr.ph
32
33 ; CHECK-BFI-LICM: .lr.ph.preheader:
34 ; CHECK-BFI-LICM-NOT: load i32, i32* @g
35 ; CHECK-BFI-LICM: br label %.lr.ph
3036
3137 .lr.ph:
3238 %.03 = phi i32 [ %8, %.combine ], [ 0, %.lr.ph.preheader ]