llvm.org GIT mirror llvm / 99dd11f
[PM] Split LoopUnrollPass and make partial unroller a function pass Summary: This is largely NFC*, in preparation for utilizing ProfileSummaryInfo and BranchFrequencyInfo analyses. In this patch I am only doing the splitting for the New PM, but I can do the same for the legacy PM as a follow-on if this looks good. *Not NFC since for partial unrolling we lose the updates done to the loop traversal (adding new sibling and child loops) - according to Chandler this is not very useful for partial unrolling, but it also means that the debugging flag -unroll-revisit-child-loops no longer works for partial unrolling. Reviewers: chandlerc Subscribers: mehdi_amini, mzolotukhin, eraman, llvm-commits Differential Revision: https://reviews.llvm.org/D36157 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309886 91177308-0d34-0410-b5e6-96231b3b80d8 Teresa Johnson 2 years ago
19 changed file(s) with 181 addition(s) and 115 deletion(s). Raw diff Collapse all Expand all
1515
1616 namespace llvm {
1717
18 class LoopUnrollPass : public PassInfoMixin {
19 const bool AllowPartialUnrolling;
18 /// Loop unroll pass that only does full loop unrolling.
19 class LoopFullUnrollPass : public PassInfoMixin {
2020 const int OptLevel;
2121
22 explicit LoopUnrollPass(bool AllowPartialUnrolling, int OptLevel)
23 : AllowPartialUnrolling(AllowPartialUnrolling), OptLevel(OptLevel) {}
24
2522 public:
26 /// Create an instance of the loop unroll pass that will support both full
27 /// and partial unrolling.
28 ///
29 /// This uses the target information (or flags) to control the thresholds for
30 /// different unrolling stategies but supports all of them.
31 static LoopUnrollPass create(int OptLevel = 2) {
32 return LoopUnrollPass(/*AllowPartialUnrolling*/ true, OptLevel);
33 }
34
35 /// Create an instance of the loop unroll pass that only does full loop
36 /// unrolling.
37 ///
38 /// This will disable any runtime or partial unrolling.
39 static LoopUnrollPass createFull(int OptLevel = 2) {
40 return LoopUnrollPass(/*AllowPartialUnrolling*/ false, OptLevel);
41 }
23 explicit LoopFullUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
4224
4325 PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
4426 LoopStandardAnalysisResults &AR, LPMUpdater &U);
4527 };
28
29 /// Loop unroll pass that will support both full and partial unrolling.
30 /// It is a function pass to have access to function and module analyses.
31 /// It will also put loops into canonical form (simplified and LCSSA).
32 class LoopUnrollPass : public PassInfoMixin {
33 const int OptLevel;
34
35 public:
36 /// This uses the target information (or flags) to control the thresholds for
37 /// different unrolling stategies but supports all of them.
38 explicit LoopUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
39
40 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
41 };
4642 } // end namespace llvm
4743
4844 #endif // LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
393393 // inaccurate.
394394 if (Phase != ThinLTOPhase::PreLink ||
395395 !PGOOpt || PGOOpt->SampleProfileFile.empty())
396 LPM2.addPass(LoopUnrollPass::createFull(Level));
396 LPM2.addPass(LoopFullUnrollPass(Level));
397397
398398 for (auto &C : LoopOptimizerEndEPCallbacks)
399399 C(LPM2, Level);
722722 // FIXME: It would be really good to use a loop-integrated instruction
723723 // combiner for cleanup here so that the unrolling and LICM can be pipelined
724724 // across the loop nests.
725 OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create(Level)));
725 OptimizePM.addPass(LoopUnrollPass(Level));
726726 OptimizePM.addPass(InstCombinePass());
727727 OptimizePM.addPass(RequireAnalysisPass());
728728 OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
195195 FUNCTION_PASS("sroa", SROA())
196196 FUNCTION_PASS("tailcallelim", TailCallElimPass())
197197 FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
198 FUNCTION_PASS("unroll", LoopUnrollPass())
198199 FUNCTION_PASS("verify", VerifierPass())
199200 FUNCTION_PASS("verify", DominatorTreeVerifierPass())
200201 FUNCTION_PASS("verify", LoopVerifierPass())
226227 LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass())
227228 LOOP_PASS("strength-reduce", LoopStrengthReducePass())
228229 LOOP_PASS("indvars", IndVarSimplifyPass())
229 LOOP_PASS("unroll", LoopUnrollPass::create())
230 LOOP_PASS("unroll-full", LoopUnrollPass::createFull())
230 LOOP_PASS("unroll-full", LoopFullUnrollPass())
231231 LOOP_PASS("unswitch", SimpleLoopUnswitchPass())
232232 LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
233233 LOOP_PASS("print", IVUsersPrinterPass(dbgs()))
11281128 return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0);
11291129 }
11301130
1131 PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
1132 LoopStandardAnalysisResults &AR,
1133 LPMUpdater &Updater) {
1131 PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
1132 LoopStandardAnalysisResults &AR,
1133 LPMUpdater &Updater) {
11341134 const auto &FAM =
11351135 AM.getResult(L, AR).getManager();
11361136 Function *F = L.getHeader()->getParent();
11381138 auto *ORE = FAM.getCachedResult(*F);
11391139 // FIXME: This should probably be optional rather than required.
11401140 if (!ORE)
1141 report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
1142 "cached at a higher level");
1141 report_fatal_error(
1142 "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
1143 "cached at a higher level");
11431144
11441145 // Keep track of the previous loop structure so we can identify new loops
11451146 // created by unrolling.
11501151 else
11511152 OldLoops.insert(AR.LI.begin(), AR.LI.end());
11521153
1153 // The API here is quite complex to call, but there are only two interesting
1154 // states we support: partial and full (or "simple") unrolling. However, to
1155 // enable these things we actually pass "None" in for the optional to avoid
1156 // providing an explicit choice.
1157 Optional AllowPartialParam, RuntimeParam, UpperBoundParam;
1158 if (!AllowPartialUnrolling)
1159 AllowPartialParam = RuntimeParam = UpperBoundParam = false;
1160 bool Changed = tryToUnrollLoop(
1161 &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1162 /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
1163 /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam);
1154 bool Changed =
1155 tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1156 /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
1157 /*Threshold*/ None, /*AllowPartial*/ false,
1158 /*Runtime*/ false, /*UpperBound*/ false);
11641159 if (!Changed)
11651160 return PreservedAnalyses::all();
11661161
11711166 #endif
11721167
11731168 // Unrolling can do several things to introduce new loops into a loop nest:
1174 // - Partial unrolling clones child loops within the current loop. If it
1175 // uses a remainder, then it can also create any number of sibling loops.
11761169 // - Full unrolling clones child loops within the current loop but then
11771170 // removes the current loop making all of the children appear to be new
11781171 // sibling loops.
1179 // - Loop peeling can directly introduce new sibling loops by peeling one
1180 // iteration.
11811172 //
1182 // When a new loop appears as a sibling loop, either from peeling an
1183 // iteration or fully unrolling, its nesting structure has fundamentally
1184 // changed and we want to revisit it to reflect that.
1173 // When a new loop appears as a sibling loop after fully unrolling,
1174 // its nesting structure has fundamentally changed and we want to revisit
1175 // it to reflect that.
11851176 //
11861177 // When unrolling has removed the current loop, we need to tell the
11871178 // infrastructure that it is gone.
12121203 } else {
12131204 // We can only walk child loops if the current loop remained valid.
12141205 if (UnrollRevisitChildLoops) {
1215 // Walk *all* of the child loops. This is a highly speculative mode
1216 // anyways so look for any simplifications that arose from partial
1217 // unrolling or peeling off of iterations.
1206 // Walk *all* of the child loops.
12181207 SmallVector ChildLoops(L.begin(), L.end());
12191208 Updater.addChildLoops(ChildLoops);
12201209 }
12221211
12231212 return getLoopPassPreservedAnalyses();
12241213 }
1214
1215 template
1216 static SmallVector appendLoopsToWorklist(RangeT &&Loops) {
1217 SmallVector Worklist;
1218 // We use an internal worklist to build up the preorder traversal without
1219 // recursion.
1220 SmallVector PreOrderLoops, PreOrderWorklist;
1221
1222 for (Loop *RootL : Loops) {
1223 assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1224 assert(PreOrderWorklist.empty() &&
1225 "Must start with an empty preorder walk worklist.");
1226 PreOrderWorklist.push_back(RootL);
1227 do {
1228 Loop *L = PreOrderWorklist.pop_back_val();
1229 PreOrderWorklist.append(L->begin(), L->end());
1230 PreOrderLoops.push_back(L);
1231 } while (!PreOrderWorklist.empty());
1232
1233 Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
1234 PreOrderLoops.clear();
1235 }
1236 return Worklist;
1237 }
1238
1239 PreservedAnalyses LoopUnrollPass::run(Function &F,
1240 FunctionAnalysisManager &AM) {
1241 auto &SE = AM.getResult(F);
1242 auto &LI = AM.getResult(F);
1243 auto &TTI = AM.getResult(F);
1244 auto &DT = AM.getResult(F);
1245 auto &AC = AM.getResult(F);
1246 auto &ORE = AM.getResult(F);
1247
1248 bool Changed = false;
1249
1250 // The unroller requires loops to be in simplified form, and also needs LCSSA.
1251 // Since simplification may add new inner loops, it has to run before the
1252 // legality and profitability checks. This means running the loop unroller
1253 // will simplify all loops, regardless of whether anything end up being
1254 // unrolled.
1255 for (auto &L : LI) {
1256 Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */);
1257 Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1258 }
1259
1260 SmallVector Worklist = appendLoopsToWorklist(LI);
1261
1262 while (!Worklist.empty()) {
1263 // Because the LoopInfo stores the loops in RPO, we walk the worklist
1264 // from back to front so that we work forward across the CFG, which
1265 // for unrolling is only needed to get optimization remarks emitted in
1266 // a forward order.
1267 Loop &L = *Worklist.pop_back_val();
1268 #ifndef NDEBUG
1269 Loop *ParentL = L.getParentLoop();
1270 #endif
1271
1272 // The API here is quite complex to call, but there are only two interesting
1273 // states we support: partial and full (or "simple") unrolling. However, to
1274 // enable these things we actually pass "None" in for the optional to avoid
1275 // providing an explicit choice.
1276 Optional AllowPartialParam, RuntimeParam, UpperBoundParam;
1277 bool CurChanged = tryToUnrollLoop(
1278 &L, DT, &LI, SE, TTI, AC, ORE,
1279 /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
1280 /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam);
1281 Changed |= CurChanged;
1282
1283 // The parent must not be damaged by unrolling!
1284 #ifndef NDEBUG
1285 if (CurChanged && ParentL)
1286 ParentL->verifyLoop();
1287 #endif
1288 }
1289
1290 if (!Changed)
1291 return PreservedAnalyses::all();
1292
1293 return getLoopPassPreservedAnalyses();
1294 }
139139 ; CHECK-O-NEXT: Running pass: LoopIdiomRecognizePass
140140 ; CHECK-EP-LOOP-LATE-NEXT: Running pass: NoOpLoopPass
141141 ; CHECK-O-NEXT: Running pass: LoopDeletionPass
142 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
142 ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
143143 ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass
144144 ; CHECK-O-NEXT: Finished Loop pass manager run.
145145 ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass
196196 ; CHECK-O-NEXT: Running pass: SLPVectorizerPass
197197 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
198198 ; CHECK-O-NEXT: Running pass: InstCombinePass
199 ; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopUnrollPass
199 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
200200 ; CHECK-O-NEXT: Running pass: InstCombinePass
201201 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
202202 ; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass
131131 ; CHECK-O-NEXT: Running pass: IndVarSimplifyPass
132132 ; CHECK-O-NEXT: Running pass: LoopIdiomRecognizePass
133133 ; CHECK-O-NEXT: Running pass: LoopDeletionPass
134 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
134 ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
135135 ; CHECK-O-NEXT: Finished Loop pass manager run.
136136 ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass
137137 ; CHECK-Os-NEXT: Running pass: GVN
183183 ; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
184184 ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
185185 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
186 ; CHECK-POSTLINK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopUnrollPass
186 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
187187 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
188188 ; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
189189 ; CHECK-POSTLINK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass
99 ; preprocess the test.
1010
1111 ; RUN: opt < %s -loop-unroll -inline -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s
12 ; RUN: opt < %s -passes='function(require,loop(unroll)),require,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s
12 ; RUN: opt < %s -passes='function(require,unroll),require,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s
1313 ; CHECK-LABEL: define internal i32 @bar()
1414
1515 define internal i32 @baz() {
11 ; to the internal functions are cold, thereby preventing the last call to
22 ; static bonus from being applied.
33
4 ; RUN: opt < %s -passes='function(require,loop(unroll)),require,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s
4 ; RUN: opt < %s -passes='function(require,unroll),require,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s
55
66 ; CHECK-LABEL: define internal i32 @baz
77 define internal i32 @baz() {
0 ; RUN: opt < %s -loop-unroll -S | FileCheck %s
1 ; RUN: opt < %s -passes='require,loop(unroll)' -S | FileCheck %s
1 ; RUN: opt < %s -passes='require,unroll' -S | FileCheck %s
22
33
44 ; This should not unroll since the address of the loop header is taken.
2525 ; RUN: opt < %s -S -passes='require,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3
2626
2727 ; Check that these work when the unroller has partial unrolling enabled too.
28 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST1
29 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=200 | FileCheck %s -check-prefix=TEST2
30 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3
28 ; RUN: opt < %s -S -passes='require,unroll' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST1
29 ; RUN: opt < %s -S -passes='require,unroll' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=200 | FileCheck %s -check-prefix=TEST2
30 ; RUN: opt < %s -S -passes='require,unroll' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3
3131
3232 ; If the absolute threshold is too low, we should not unroll:
3333 ; TEST1: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
0 ; RUN: opt < %s -S -unroll-partial-threshold=20 -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s
1 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-partial-threshold=20 -unroll-threshold=20 -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s
1 ; RUN: opt < %s -S -passes='require,unroll' -unroll-partial-threshold=20 -unroll-threshold=20 -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s
22 ;
33 ; Also check that the simple unroller doesn't allow the partial unrolling.
44 ; RUN: opt < %s -S -passes='require,loop(unroll-full)' -unroll-partial-threshold=20 -unroll-threshold=20 -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s --check-prefix=CHECK-NO-UNROLL
33 ; current two cases.
44 ;
55 ; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \
6 ; RUN: -passes='require,loop(unroll)' \
6 ; RUN: -passes='require,loop(unroll-full)' \
77 ; RUN: | FileCheck %s
88 ;
99 ; Also run in a special mode that visits children.
1010 ; RUN: opt < %s -disable-output -debug-pass-manager -unroll-revisit-child-loops 2>&1 \
11 ; RUN: -passes='require,loop(unroll)' \
11 ; RUN: -passes='require,loop(unroll-full)' \
1212 ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-CHILDREN
1313
1414 ; Basic test is fully unrolled and we revisit the post-unroll new sibling
1515 ; loops, including the ones that used to be child loops.
1616 define void @full_unroll(i1* %ptr) {
1717 ; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on full_unroll
18 ; CHECK-NOT: LoopUnrollPass
18 ; CHECK-NOT: LoopFullUnrollPass
1919
2020 entry:
2121 br label %l0
3838 l0.0.0:
3939 %cond.0.0.0 = load volatile i1, i1* %ptr
4040 br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
41 ; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0
42 ; CHECK-NOT: LoopUnrollPass
41 ; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0
42 ; CHECK-NOT: LoopFullUnrollPass
4343
4444 l0.0.1.ph:
4545 br label %l0.0.1
4747 l0.0.1:
4848 %cond.0.0.1 = load volatile i1, i1* %ptr
4949 br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
50 ; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1
51 ; CHECK-NOT: LoopUnrollPass
50 ; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1
51 ; CHECK-NOT: LoopFullUnrollPass
5252
5353 l0.0.latch:
5454 %cmp = icmp slt i32 %iv.next, 2
5555 br i1 %cmp, label %l0.0, label %l0.latch
56 ; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0
57 ; CHECK-NOT: LoopUnrollPass
56 ; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0
57 ; CHECK-NOT: LoopFullUnrollPass
5858 ;
5959 ; Unrolling occurs, so we visit what were the inner loops twice over. First we
6060 ; visit their clones, and then we visit the original loops re-parented.
61 ; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.1
62 ; CHECK-NOT: LoopUnrollPass
63 ; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.1
64 ; CHECK-NOT: LoopUnrollPass
65 ; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1
66 ; CHECK-NOT: LoopUnrollPass
67 ; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0
68 ; CHECK-NOT: LoopUnrollPass
61 ; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1.1
62 ; CHECK-NOT: LoopFullUnrollPass
63 ; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0.1
64 ; CHECK-NOT: LoopFullUnrollPass
65 ; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1
66 ; CHECK-NOT: LoopFullUnrollPass
67 ; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0
68 ; CHECK-NOT: LoopFullUnrollPass
6969
7070 l0.latch:
7171 br label %l0
72 ; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0
73 ; CHECK-NOT: LoopUnrollPass
72 ; CHECK: LoopFullUnrollPass on Loop at depth 1 containing: %l0
73 ; CHECK-NOT: LoopFullUnrollPass
7474
7575 exit:
7676 ret void
8181 ; default visited, but will be visited with a special parameter.
8282 define void @partial_unroll(i32 %count, i1* %ptr) {
8383 ; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on partial_unroll
84 ; CHECK-NOT: LoopUnrollPass
84 ; CHECK-NOT: LoopFullUnrollPass
8585
8686 entry:
8787 br label %l0
104104 l0.0.0:
105105 %cond.0.0.0 = load volatile i1, i1* %ptr
106106 br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
107 ; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0
108 ; CHECK-NOT: LoopUnrollPass
107 ; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0
108 ; CHECK-NOT: LoopFullUnrollPass
109109
110110 l0.0.1.ph:
111111 br label %l0.0.1
113113 l0.0.1:
114114 %cond.0.0.1 = load volatile i1, i1* %ptr
115115 br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
116 ; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1
117 ; CHECK-NOT: LoopUnrollPass
116 ; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1
117 ; CHECK-NOT: LoopFullUnrollPass
118118
119119 l0.0.latch:
120120 %cmp = icmp slt i32 %iv.next, %count
121121 br i1 %cmp, label %l0.0, label %l0.latch, !llvm.loop !1
122 ; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0
123 ; CHECK-NOT: LoopUnrollPass
122 ; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0
123 ; CHECK-NOT: LoopFullUnrollPass
124124 ;
125125 ; Partial unrolling occurs which introduces both new child loops and new sibling
126126 ; loops. We only visit the child loops in a special mode, not by default.
127 ; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0
128 ; CHECK-CHILDREN-NOT: LoopUnrollPass
129 ; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1
130 ; CHECK-CHILDREN-NOT: LoopUnrollPass
131 ; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0.1
132 ; CHECK-CHILDREN-NOT: LoopUnrollPass
133 ; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1.1
134 ; CHECK-CHILDREN-NOT: LoopUnrollPass
127 ; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0
128 ; CHECK-CHILDREN-NOT: LoopFullUnrollPass
129 ; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1
130 ; CHECK-CHILDREN-NOT: LoopFullUnrollPass
131 ; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0.1
132 ; CHECK-CHILDREN-NOT: LoopFullUnrollPass
133 ; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1.1
134 ; CHECK-CHILDREN-NOT: LoopFullUnrollPass
135135 ;
136136 ; When we revisit children, we also revisit the current loop.
137 ; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 2 containing: %l0.0
138 ; CHECK-CHILDREN-NOT: LoopUnrollPass
137 ; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0
138 ; CHECK-CHILDREN-NOT: LoopFullUnrollPass
139139 ;
140140 ; Revisit the children of the outer loop that are part of the epilogue.
141141 ;
142 ; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.epil
143 ; CHECK-NOT: LoopUnrollPass
144 ; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.epil
145 ; CHECK-NOT: LoopUnrollPass
142 ; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0.epil
143 ; CHECK-NOT: LoopFullUnrollPass
144 ; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1.epil
145 ; CHECK-NOT: LoopFullUnrollPass
146146 l0.latch:
147147 br label %l0
148 ; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0
149 ; CHECK-NOT: LoopUnrollPass
148 ; CHECK: LoopFullUnrollPass on Loop at depth 1 containing: %l0
149 ; CHECK-NOT: LoopFullUnrollPass
150150
151151 exit:
152152 ret void
0 ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
11 ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
22
3 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
4 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
3 ; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
4 ; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
55
66 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
77
0 ; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
11 ; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
22
3 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
4 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
3 ; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
4 ; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
55
66 ; This tests that setting the unroll count works
77
0 ; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
11 ; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
22
3 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
4 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
3 ; RUN: opt < %s -S -passes='require,unroll' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
4 ; RUN: opt < %s -S -passes='require,unroll' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
55
66 ; Choose a smaller, power-of-two, unroll count if the loop is too large.
77 ; This test makes sure we're not unrolling 'odd' counts
0 ; REQUIRES: asserts
11 ; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-partial-threshold=200 -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
2 ; RUN: opt < %s -disable-output -stats -passes='require,loop(unroll)' -unroll-runtime -unroll-partial-threshold=200 -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
2 ; RUN: opt < %s -disable-output -stats -passes='require,unroll' -unroll-runtime -unroll-partial-threshold=200 -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
33
44 ; Test that nested loops can be unrolled. We need to increase threshold to do it
55
0 ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
11 ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
22
3 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
4 ; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
3 ; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
4 ; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
55
66 ; Given that the trip-count of this loop is a 3-bit value, we cannot
77 ; safely unroll it with a count of anything more than 8.
0 ; RUN: opt < %s -S -loop-unroll -verify-loop-info | FileCheck %s
1 ; RUN: opt < %s -S -passes='require,loop(unroll),verify' | FileCheck %s
1 ; RUN: opt < %s -S -passes='require,unroll,verify' | FileCheck %s
22 ;
33 ; Unit tests for LoopInfo::markAsRemoved.
44
0 ; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s
1 ; RUN: opt -S < %s -passes='require,loop(unroll),require' | FileCheck %s
1 ; RUN: opt -S < %s -passes='require,unroll,require' | FileCheck %s
22 ; Crasher from PR20987.
33
44 ; CHECK: define void @update_loop_info_in_subloops