llvm.org GIT mirror llvm / 214aad9
Add new llvm.loop.unroll.enable metadata. This change adds the unroll metadata "llvm.loop.unroll.enable" which directs the optimizer to unroll a loop fully if the trip count is known at compile time, and unroll partially if the trip count is not known at compile time. This differs from "llvm.loop.unroll.full" which explicitly does not unroll a loop if the trip count is not known at compile time. The "llvm.loop.unroll.enable" is intended to be added for loops annotated with "#pragma unroll". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244466 91177308-0d34-0410-b5e6-96231b3b80d8 Mark Heffernan 4 years ago
3 changed file(s) with 118 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
42784278
42794279 !0 = !{!"llvm.loop.unroll.runtime.disable"}
42804280
4281 '``llvm.loop.unroll.enable``' Metadata
4282 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4283
4284 This metadata suggests that the loop should be fully unrolled if the trip count
4285 is known at compile time and partially unrolled if the trip count is not known
4286 at compile time. The metadata has a single operand which is the string
4287 ``llvm.loop.unroll.enable``. For example:
4288
4289 .. code-block:: llvm
4290
4291 !0 = !{!"llvm.loop.unroll.enable"}
4292
42814293 '``llvm.loop.unroll.full``' Metadata
42824294 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
42834295
186186 // total unrolled size. Parameters Threshold and PartialThreshold
187187 // are set to the maximum unrolled size for fully and partially
188188 // unrolled loops respectively.
189 void selectThresholds(const Loop *L, bool HasPragma,
189 void selectThresholds(const Loop *L, bool UsePragmaThreshold,
190190 const TargetTransformInfo::UnrollingPreferences &UP,
191191 unsigned &Threshold, unsigned &PartialThreshold,
192192 unsigned &PercentDynamicCostSavedThreshold,
213213 Threshold = UP.OptSizeThreshold;
214214 PartialThreshold = UP.PartialOptSizeThreshold;
215215 }
216 if (HasPragma) {
216 if (UsePragmaThreshold) {
217217 // If the loop has an unrolling pragma, we want to be more
218218 // aggressive with unrolling limits. Set thresholds to at
219219 // least the PragmaTheshold value which is larger than the
725725 return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
726726 }
727727
728 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
729 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
730 static bool HasUnrollEnablePragma(const Loop *L) {
731 return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
732 }
733
728734 // Returns true if the loop has an unroll(disable) pragma.
729735 static bool HasUnrollDisablePragma(const Loop *L) {
730736 return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
900906 return false;
901907 }
902908 bool PragmaFullUnroll = HasUnrollFullPragma(L);
909 bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
903910 unsigned PragmaCount = UnrollCountPragmaValue(L);
904 bool HasPragma = PragmaFullUnroll || PragmaCount > 0;
911 bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
905912
906913 TargetTransformInfo::UnrollingPreferences UP;
907914 getUnrollingPreferences(L, TTI, UP);
949956 unsigned Threshold, PartialThreshold;
950957 unsigned PercentDynamicCostSavedThreshold;
951958 unsigned DynamicCostSavingsDiscount;
952 selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,
959 // Only use the high pragma threshold when we have a target unroll factor such
960 // as with "#pragma unroll N" or a pragma indicating full unrolling and the
961 // trip count is known. Otherwise we rely on the standard threshold to
962 // heuristically select a reasonable unroll count.
963 bool UsePragmaThreshold =
964 PragmaCount > 0 ||
965 ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0);
966
967 selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold,
953968 PercentDynamicCostSavedThreshold,
954969 DynamicCostSavingsDiscount);
955970
984999
9851000 // Reduce count based on the type of unrolling and the threshold values.
9861001 unsigned OriginalCount = Count;
987 bool AllowRuntime =
988 (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime);
1002 bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) ||
1003 (UserRuntime ? CurrentRuntime : UP.Runtime);
9891004 // Don't unroll a runtime trip count loop with unroll full pragma.
9901005 if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
9911006 AllowRuntime = false;
9921007 }
9931008 if (Unrolling == Partial) {
994 bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
1009 bool AllowPartial = PragmaEnableUnroll ||
1010 (UserAllowPartial ? CurrentAllowPartial : UP.Partial);
9951011 if (!AllowPartial && !CountSetExplicitly) {
9961012 DEBUG(dbgs() << " will not try to unroll partially because "
9971013 << "-unroll-allow-partial not given\n");
10311047 DebugLoc LoopLoc = L->getStartLoc();
10321048 Function *F = Header->getParent();
10331049 LLVMContext &Ctx = F->getContext();
1034 if (PragmaFullUnroll && PragmaCount == 0) {
1035 if (TripCount && Count != TripCount) {
1036 emitOptimizationRemarkMissed(
1037 Ctx, DEBUG_TYPE, *F, LoopLoc,
1038 "Unable to fully unroll loop as directed by unroll(full) pragma "
1039 "because unrolled size is too large.");
1040 } else if (!TripCount) {
1041 emitOptimizationRemarkMissed(
1042 Ctx, DEBUG_TYPE, *F, LoopLoc,
1043 "Unable to fully unroll loop as directed by unroll(full) pragma "
1044 "because loop has a runtime trip count.");
1045 }
1046 } else if (PragmaCount > 0 && Count != OriginalCount) {
1050 if ((PragmaCount > 0) && Count != OriginalCount) {
10471051 emitOptimizationRemarkMissed(
10481052 Ctx, DEBUG_TYPE, *F, LoopLoc,
10491053 "Unable to unroll loop the number of times directed by "
10501054 "unroll_count pragma because unrolled size is too large.");
1055 } else if (PragmaFullUnroll && !TripCount) {
1056 emitOptimizationRemarkMissed(
1057 Ctx, DEBUG_TYPE, *F, LoopLoc,
1058 "Unable to fully unroll loop as directed by unroll(full) pragma "
1059 "because loop has a runtime trip count.");
1060 } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) {
1061 emitOptimizationRemarkMissed(
1062 Ctx, DEBUG_TYPE, *F, LoopLoc,
1063 "Unable to unroll loop as directed by unroll(enable) pragma because "
1064 "unrolled size is too large.");
1065 } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
1066 Count != TripCount) {
1067 emitOptimizationRemarkMissed(
1068 Ctx, DEBUG_TYPE, *F, LoopLoc,
1069 "Unable to fully unroll loop as directed by unroll pragma because "
1070 "unrolled size is too large.");
10511071 }
10521072 }
10531073
255255 ret void
256256 }
257257 !12 = !{!12, !4}
258
259 ; #pragma clang loop unroll(enable)
260 ; Loop should be fully unrolled.
261 ;
262 ; CHECK-LABEL: @loop64_with_enable(
263 ; CHECK-NOT: br i1
264 define void @loop64_with_enable(i32* nocapture %a) {
265 entry:
266 br label %for.body
267
268 for.body: ; preds = %for.body, %entry
269 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
270 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
271 %0 = load i32, i32* %arrayidx, align 4
272 %inc = add nsw i32 %0, 1
273 store i32 %inc, i32* %arrayidx, align 4
274 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
275 %exitcond = icmp eq i64 %indvars.iv.next, 64
276 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
277
278 for.end: ; preds = %for.body
279 ret void
280 }
281 !13 = !{!13, !14}
282 !14 = !{!"llvm.loop.unroll.enable"}
283
284 ; #pragma clang loop unroll(enable)
285 ; Loop has a runtime trip count and should be runtime unrolled and duplicated
286 ; (original and 8x).
287 ;
288 ; CHECK-LABEL: @runtime_loop_with_enable(
289 ; CHECK: for.body.prol:
290 ; CHECK: store
291 ; CHECK-NOT: store
292 ; CHECK: br i1
293 ; CHECK: for.body:
294 ; CHECK: store i32
295 ; CHECK: store i32
296 ; CHECK: store i32
297 ; CHECK: store i32
298 ; CHECK: store i32
299 ; CHECK: store i32
300 ; CHECK: store i32
301 ; CHECK: store i32
302 ; CHECK-NOT: store i32
303 ; CHECK: br i1
304 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
305 entry:
306 %cmp3 = icmp sgt i32 %b, 0
307 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
308
309 for.body: ; preds = %entry, %for.body
310 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
311 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
312 %0 = load i32, i32* %arrayidx, align 4
313 %inc = add nsw i32 %0, 1
314 store i32 %inc, i32* %arrayidx, align 4
315 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
316 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
317 %exitcond = icmp eq i32 %lftr.wideiv, %b
318 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
319
320 for.end: ; preds = %for.body, %entry
321 ret void
322 }
323 !15 = !{!15, !14}